summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c1187
1 files changed, 1187 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c
new file mode 100644
index 00000000..4a1df339
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c
@@ -0,0 +1,1187 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/kernel.h>
18#include <linux/slab.h>
19
20#include "mm_gk20a.h"
21#include "platform_gk20a.h"
22#include "gk20a_allocator.h"
23#include "buddy_allocator_priv.h"
24
25static struct kmem_cache *buddy_cache; /* slab cache for meta data. */
26
27/* Some other buddy allocator functions. */
28static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a,
29 u64 addr);
30static void balloc_coalesce(struct gk20a_buddy_allocator *a,
31 struct gk20a_buddy *b);
32static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a,
33 struct gk20a_fixed_alloc *falloc);
34
35/*
36 * This function is not present in older kernel's list.h code.
37 */
38#ifndef list_last_entry
39#define list_last_entry(ptr, type, member) \
40 list_entry((ptr)->prev, type, member)
41#endif
42
43/*
44 * GPU buddy allocator for various address spaces.
45 *
46 * Current limitations:
47 * o A fixed allocation could potentially be made that borders PDEs with
48 * different PTE sizes. This would require that fixed buffer to have
49 * different sized PTEs for different parts of the allocation. Probably
50 * best to just require PDE alignment for fixed address allocs.
51 *
52 * o It is currently possible to make an allocator that has a buddy alignment
53 * out of sync with the PDE block size alignment. A simple example is a
54 * 32GB address space starting at byte 1. Every buddy is shifted off by 1
55 * which means each buddy corresponf to more than one actual GPU page. The
56 * best way to fix this is probably just require PDE blocksize alignment
57 * for the start of the address space. At the moment all allocators are
58 * easily PDE aligned so this hasn't been a problem.
59 */
60
61/*
62 * Pick a suitable maximum order for this allocator.
63 *
64 * Hueristic: Just guessing that the best max order is the largest single
65 * block that will fit in the address space.
66 */
67static void balloc_compute_max_order(struct gk20a_buddy_allocator *a)
68{
69 u64 true_max_order = ilog2(a->blks);
70
71 if (a->max_order == 0) {
72 a->max_order = true_max_order;
73 return;
74 }
75
76 if (a->max_order > true_max_order)
77 a->max_order = true_max_order;
78 if (a->max_order > GPU_BALLOC_MAX_ORDER)
79 a->max_order = GPU_BALLOC_MAX_ORDER;
80}
81
82/*
83 * Since we can only allocate in chucks of a->blk_size we need to trim off
84 * any excess data that is not aligned to a->blk_size.
85 */
86static void balloc_allocator_align(struct gk20a_buddy_allocator *a)
87{
88 a->start = ALIGN(a->base, a->blk_size);
89 WARN_ON(a->start != a->base);
90 a->end = (a->base + a->length) & ~(a->blk_size - 1);
91 a->count = a->end - a->start;
92 a->blks = a->count >> a->blk_shift;
93}
94
95/*
96 * Pass NULL for parent if you want a top level buddy.
97 */
98static struct gk20a_buddy *balloc_new_buddy(struct gk20a_buddy_allocator *a,
99 struct gk20a_buddy *parent,
100 u64 start, u64 order)
101{
102 struct gk20a_buddy *new_buddy;
103
104 new_buddy = kmem_cache_alloc(buddy_cache, GFP_KERNEL);
105 if (!new_buddy)
106 return NULL;
107
108 memset(new_buddy, 0, sizeof(struct gk20a_buddy));
109
110 new_buddy->parent = parent;
111 new_buddy->start = start;
112 new_buddy->order = order;
113 new_buddy->end = start + (1 << order) * a->blk_size;
114
115 return new_buddy;
116}
117
118static void __balloc_buddy_list_add(struct gk20a_buddy_allocator *a,
119 struct gk20a_buddy *b,
120 struct list_head *list)
121{
122 if (buddy_is_in_list(b)) {
123 alloc_dbg(balloc_owner(a),
124 "Oops: adding added buddy (%llu:0x%llx)\n",
125 b->order, b->start);
126 BUG();
127 }
128
129 /*
130 * Add big PTE blocks to the tail, small to the head for GVA spaces.
131 * This lets the code that checks if there are available blocks check
132 * without cycling through the entire list.
133 */
134 if (a->flags & GPU_BALLOC_GVA_SPACE &&
135 b->pte_size == BALLOC_PTE_SIZE_BIG)
136 list_add_tail(&b->buddy_entry, list);
137 else
138 list_add(&b->buddy_entry, list);
139
140 buddy_set_in_list(b);
141}
142
143static void __balloc_buddy_list_rem(struct gk20a_buddy_allocator *a,
144 struct gk20a_buddy *b)
145{
146 if (!buddy_is_in_list(b)) {
147 alloc_dbg(balloc_owner(a),
148 "Oops: removing removed buddy (%llu:0x%llx)\n",
149 b->order, b->start);
150 BUG();
151 }
152
153 list_del_init(&b->buddy_entry);
154 buddy_clr_in_list(b);
155}
156
157/*
158 * Add a buddy to one of the buddy lists and deal with the necessary
159 * book keeping. Adds the buddy to the list specified by the buddy's order.
160 */
161static void balloc_blist_add(struct gk20a_buddy_allocator *a,
162 struct gk20a_buddy *b)
163{
164 __balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order));
165 a->buddy_list_len[b->order]++;
166}
167
168static void balloc_blist_rem(struct gk20a_buddy_allocator *a,
169 struct gk20a_buddy *b)
170{
171 __balloc_buddy_list_rem(a, b);
172 a->buddy_list_len[b->order]--;
173}
174
175static u64 balloc_get_order(struct gk20a_buddy_allocator *a, u64 len)
176{
177 if (len == 0)
178 return 0;
179
180 len--;
181 len >>= a->blk_shift;
182
183 return fls(len);
184}
185
186static u64 __balloc_max_order_in(struct gk20a_buddy_allocator *a,
187 u64 start, u64 end)
188{
189 u64 size = (end - start) >> a->blk_shift;
190
191 if (size > 0)
192 return min_t(u64, ilog2(size), a->max_order);
193 else
194 return GPU_BALLOC_MAX_ORDER;
195}
196
197/*
198 * Initialize the buddy lists.
199 */
200static int balloc_init_lists(struct gk20a_buddy_allocator *a)
201{
202 int i;
203 u64 bstart, bend, order;
204 struct gk20a_buddy *buddy;
205
206 bstart = a->start;
207 bend = a->end;
208
209 /* First make sure the LLs are valid. */
210 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++)
211 INIT_LIST_HEAD(balloc_get_order_list(a, i));
212
213 while (bstart < bend) {
214 order = __balloc_max_order_in(a, bstart, bend);
215
216 buddy = balloc_new_buddy(a, NULL, bstart, order);
217 if (!buddy)
218 goto cleanup;
219
220 balloc_blist_add(a, buddy);
221 bstart += balloc_order_to_len(a, order);
222 }
223
224 return 0;
225
226cleanup:
227 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) {
228 if (!list_empty(balloc_get_order_list(a, i))) {
229 buddy = list_first_entry(balloc_get_order_list(a, i),
230 struct gk20a_buddy, buddy_entry);
231 balloc_blist_rem(a, buddy);
232 kmem_cache_free(buddy_cache, buddy);
233 }
234 }
235
236 return -ENOMEM;
237}
238
239/*
240 * Clean up and destroy the passed allocator.
241 */
242static void gk20a_buddy_allocator_destroy(struct gk20a_allocator *__a)
243{
244 int i;
245 struct rb_node *node;
246 struct gk20a_buddy *bud;
247 struct gk20a_fixed_alloc *falloc;
248 struct gk20a_buddy_allocator *a = __a->priv;
249
250 alloc_lock(__a);
251
252 gk20a_fini_alloc_debug(__a);
253
254 /*
255 * Free the fixed allocs first.
256 */
257 while ((node = rb_first(&a->fixed_allocs)) != NULL) {
258 falloc = container_of(node,
259 struct gk20a_fixed_alloc, alloced_entry);
260
261 rb_erase(node, &a->fixed_allocs);
262 __balloc_do_free_fixed(a, falloc);
263 }
264
265 /*
266 * And now free all outstanding allocations.
267 */
268 while ((node = rb_first(&a->alloced_buddies)) != NULL) {
269 bud = container_of(node, struct gk20a_buddy, alloced_entry);
270 balloc_free_buddy(a, bud->start);
271 balloc_blist_add(a, bud);
272 balloc_coalesce(a, bud);
273 }
274
275 /*
276 * Now clean up the unallocated buddies.
277 */
278 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) {
279 BUG_ON(a->buddy_list_alloced[i] != 0);
280
281 while (!list_empty(balloc_get_order_list(a, i))) {
282 bud = list_first_entry(balloc_get_order_list(a, i),
283 struct gk20a_buddy, buddy_entry);
284 balloc_blist_rem(a, bud);
285 kmem_cache_free(buddy_cache, bud);
286 }
287
288 if (a->buddy_list_len[i] != 0) {
289 pr_info("Excess buddies!!! (%d: %llu)\n",
290 i, a->buddy_list_len[i]);
291 BUG();
292 }
293 if (a->buddy_list_split[i] != 0) {
294 pr_info("Excess split nodes!!! (%d: %llu)\n",
295 i, a->buddy_list_split[i]);
296 BUG();
297 }
298 if (a->buddy_list_alloced[i] != 0) {
299 pr_info("Excess alloced nodes!!! (%d: %llu)\n",
300 i, a->buddy_list_alloced[i]);
301 BUG();
302 }
303 }
304
305 kfree(a);
306
307 alloc_unlock(__a);
308}
309
310/*
311 * Combine the passed buddy if possible. The pointer in @b may not be valid
312 * after this as the buddy may be freed.
313 *
314 * @a must be locked.
315 */
316static void balloc_coalesce(struct gk20a_buddy_allocator *a,
317 struct gk20a_buddy *b)
318{
319 struct gk20a_buddy *parent;
320
321 if (buddy_is_alloced(b) || buddy_is_split(b))
322 return;
323
324 /*
325 * If both our buddy and I are both not allocated and not split then
326 * we can coalesce ourselves.
327 */
328 if (!b->buddy)
329 return;
330 if (buddy_is_alloced(b->buddy) || buddy_is_split(b->buddy))
331 return;
332
333 parent = b->parent;
334
335 balloc_blist_rem(a, b);
336 balloc_blist_rem(a, b->buddy);
337
338 buddy_clr_split(parent);
339 a->buddy_list_split[parent->order]--;
340 balloc_blist_add(a, parent);
341
342 /*
343 * Recursively coalesce as far as we can go.
344 */
345 balloc_coalesce(a, parent);
346
347 /* Clean up the remains. */
348 kmem_cache_free(buddy_cache, b->buddy);
349 kmem_cache_free(buddy_cache, b);
350}
351
352/*
353 * Split a buddy into two new buddies who are 1/2 the size of the parent buddy.
354 *
355 * @a must be locked.
356 */
357static int balloc_split_buddy(struct gk20a_buddy_allocator *a,
358 struct gk20a_buddy *b, int pte_size)
359{
360 struct gk20a_buddy *left, *right;
361 u64 half;
362
363 left = balloc_new_buddy(a, b, b->start, b->order - 1);
364 if (!left)
365 return -ENOMEM;
366
367 half = (b->end - b->start) / 2;
368
369 right = balloc_new_buddy(a, b, b->start + half, b->order - 1);
370 if (!right) {
371 kmem_cache_free(buddy_cache, left);
372 return -ENOMEM;
373 }
374
375 buddy_set_split(b);
376 a->buddy_list_split[b->order]++;
377
378 b->left = left;
379 b->right = right;
380 left->buddy = right;
381 right->buddy = left;
382 left->parent = b;
383 right->parent = b;
384
385 /* PTE considerations. */
386 if (a->flags & GPU_BALLOC_GVA_SPACE &&
387 left->order <= a->pte_blk_order) {
388 left->pte_size = pte_size;
389 right->pte_size = pte_size;
390 }
391
392 balloc_blist_rem(a, b);
393 balloc_blist_add(a, left);
394 balloc_blist_add(a, right);
395
396 return 0;
397}
398
399/*
400 * Place the passed buddy into the RB tree for allocated buddies. Never fails
401 * unless the passed entry is a duplicate which is a bug.
402 *
403 * @a must be locked.
404 */
405static void balloc_alloc_buddy(struct gk20a_buddy_allocator *a,
406 struct gk20a_buddy *b)
407{
408 struct rb_node **new = &(a->alloced_buddies.rb_node);
409 struct rb_node *parent = NULL;
410
411 while (*new) {
412 struct gk20a_buddy *bud = container_of(*new, struct gk20a_buddy,
413 alloced_entry);
414
415 parent = *new;
416 if (b->start < bud->start)
417 new = &((*new)->rb_left);
418 else if (b->start > bud->start)
419 new = &((*new)->rb_right);
420 else
421 BUG_ON("Duplicate entries in allocated list!\n");
422 }
423
424 rb_link_node(&b->alloced_entry, parent, new);
425 rb_insert_color(&b->alloced_entry, &a->alloced_buddies);
426
427 buddy_set_alloced(b);
428 a->buddy_list_alloced[b->order]++;
429}
430
431/*
432 * Remove the passed buddy from the allocated buddy RB tree. Returns the
433 * deallocated buddy for further processing.
434 *
435 * @a must be locked.
436 */
437static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a,
438 u64 addr)
439{
440 struct rb_node *node = a->alloced_buddies.rb_node;
441 struct gk20a_buddy *bud;
442
443 while (node) {
444 bud = container_of(node, struct gk20a_buddy, alloced_entry);
445
446 if (addr < bud->start)
447 node = node->rb_left;
448 else if (addr > bud->start)
449 node = node->rb_right;
450 else
451 break;
452 }
453
454 if (!node)
455 return NULL;
456
457 rb_erase(node, &a->alloced_buddies);
458 buddy_clr_alloced(bud);
459 a->buddy_list_alloced[bud->order]--;
460
461 return bud;
462}
463
464/*
465 * Find a suitable buddy for the given order and PTE type (big or little).
466 */
467static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_buddy_allocator *a,
468 u64 order, int pte_size)
469{
470 struct gk20a_buddy *bud;
471
472 if (order > a->max_order ||
473 list_empty(balloc_get_order_list(a, order)))
474 return NULL;
475
476 if (a->flags & GPU_BALLOC_GVA_SPACE &&
477 pte_size == BALLOC_PTE_SIZE_BIG)
478 bud = list_last_entry(balloc_get_order_list(a, order),
479 struct gk20a_buddy, buddy_entry);
480 else
481 bud = list_first_entry(balloc_get_order_list(a, order),
482 struct gk20a_buddy, buddy_entry);
483
484 if (bud->pte_size != BALLOC_PTE_SIZE_ANY &&
485 bud->pte_size != pte_size)
486 return NULL;
487
488 return bud;
489}
490
491/*
492 * Allocate a suitably sized buddy. If no suitable buddy exists split higher
493 * order buddies until we have a suitable buddy to allocate.
494 *
495 * For PDE grouping add an extra check to see if a buddy is suitable: that the
496 * buddy exists in a PDE who's PTE size is reasonable
497 *
498 * @a must be locked.
499 */
500static u64 __balloc_do_alloc(struct gk20a_buddy_allocator *a,
501 u64 order, int pte_size)
502{
503 u64 split_order;
504 struct gk20a_buddy *bud = NULL;
505
506 split_order = order;
507 while (split_order <= a->max_order &&
508 !(bud = __balloc_find_buddy(a, split_order, pte_size)))
509 split_order++;
510
511 /* Out of memory! */
512 if (!bud)
513 return 0;
514
515 while (bud->order != order) {
516 if (balloc_split_buddy(a, bud, pte_size))
517 return 0; /* No mem... */
518 bud = bud->left;
519 }
520
521 balloc_blist_rem(a, bud);
522 balloc_alloc_buddy(a, bud);
523
524 return bud->start;
525}
526
527/*
528 * See if the passed range is actually available for allocation. If so, then
529 * return 1, otherwise return 0.
530 *
531 * TODO: Right now this uses the unoptimal approach of going through all
532 * outstanding allocations and checking their base/ends. This could be better.
533 */
534static int balloc_is_range_free(struct gk20a_buddy_allocator *a,
535 u64 base, u64 end)
536{
537 struct rb_node *node;
538 struct gk20a_buddy *bud;
539
540 node = rb_first(&a->alloced_buddies);
541 if (!node)
542 return 1; /* No allocs yet. */
543
544 bud = container_of(node, struct gk20a_buddy, alloced_entry);
545
546 while (bud->start < end) {
547 if ((bud->start > base && bud->start < end) ||
548 (bud->end > base && bud->end < end))
549 return 0;
550
551 node = rb_next(node);
552 if (!node)
553 break;
554 bud = container_of(node, struct gk20a_buddy, alloced_entry);
555 }
556
557 return 1;
558}
559
560static void balloc_alloc_fixed(struct gk20a_buddy_allocator *a,
561 struct gk20a_fixed_alloc *f)
562{
563 struct rb_node **new = &(a->fixed_allocs.rb_node);
564 struct rb_node *parent = NULL;
565
566 while (*new) {
567 struct gk20a_fixed_alloc *falloc =
568 container_of(*new, struct gk20a_fixed_alloc,
569 alloced_entry);
570
571 BUG_ON(!virt_addr_valid(falloc));
572
573 parent = *new;
574 if (f->start < falloc->start)
575 new = &((*new)->rb_left);
576 else if (f->start > falloc->start)
577 new = &((*new)->rb_right);
578 else
579 BUG_ON("Duplicate entries in allocated list!\n");
580 }
581
582 rb_link_node(&f->alloced_entry, parent, new);
583 rb_insert_color(&f->alloced_entry, &a->fixed_allocs);
584}
585
586/*
587 * Remove the passed buddy from the allocated buddy RB tree. Returns the
588 * deallocated buddy for further processing.
589 *
590 * @a must be locked.
591 */
592static struct gk20a_fixed_alloc *balloc_free_fixed(
593 struct gk20a_buddy_allocator *a, u64 addr)
594{
595 struct rb_node *node = a->fixed_allocs.rb_node;
596 struct gk20a_fixed_alloc *falloc;
597
598 while (node) {
599 falloc = container_of(node,
600 struct gk20a_fixed_alloc, alloced_entry);
601
602 if (addr < falloc->start)
603 node = node->rb_left;
604 else if (addr > falloc->start)
605 node = node->rb_right;
606 else
607 break;
608 }
609
610 if (!node)
611 return NULL;
612
613 rb_erase(node, &a->fixed_allocs);
614
615 return falloc;
616}
617
618/*
619 * Find the parent range - doesn't necessarily need the parent to actually exist
620 * as a buddy. Finding an existing parent comes later...
621 */
622static void __balloc_get_parent_range(struct gk20a_buddy_allocator *a,
623 u64 base, u64 order,
624 u64 *pbase, u64 *porder)
625{
626 u64 base_mask;
627 u64 shifted_base = balloc_base_shift(a, base);
628
629 order++;
630 base_mask = ~((a->blk_size << order) - 1);
631
632 shifted_base &= base_mask;
633
634 *pbase = balloc_base_unshift(a, shifted_base);
635 *porder = order;
636}
637
638/*
639 * Makes a buddy at the passed address. This will make all parent buddies
640 * necessary for this buddy to exist as well.
641 */
642static struct gk20a_buddy *__balloc_make_fixed_buddy(
643 struct gk20a_buddy_allocator *a, u64 base, u64 order)
644{
645 struct gk20a_buddy *bud = NULL;
646 struct list_head *order_list;
647 u64 cur_order = order, cur_base = base;
648
649 /*
650 * Algo:
651 * 1. Keep jumping up a buddy order until we find the real buddy that
652 * this buddy exists in.
653 * 2. Then work our way down through the buddy tree until we hit a dead
654 * end.
655 * 3. Start splitting buddies until we split to the one we need to
656 * make.
657 */
658 while (cur_order <= a->max_order) {
659 int found = 0;
660
661 order_list = balloc_get_order_list(a, cur_order);
662 list_for_each_entry(bud, order_list, buddy_entry) {
663 if (bud->start == cur_base) {
664 found = 1;
665 break;
666 }
667 }
668
669 if (found)
670 break;
671
672 __balloc_get_parent_range(a, cur_base, cur_order,
673 &cur_base, &cur_order);
674 }
675
676 if (cur_order > a->max_order) {
677 alloc_dbg(balloc_owner(a), "No buddy for range ???\n");
678 return NULL;
679 }
680
681 /* Split this buddy as necessary until we get the target buddy. */
682 while (bud->start != base || bud->order != order) {
683 if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) {
684 balloc_coalesce(a, bud);
685 return NULL;
686 }
687
688 if (base < bud->right->start)
689 bud = bud->left;
690 else
691 bud = bud->right;
692
693 }
694
695 return bud;
696}
697
698static u64 __balloc_do_alloc_fixed(struct gk20a_buddy_allocator *a,
699 struct gk20a_fixed_alloc *falloc,
700 u64 base, u64 len)
701{
702 u64 shifted_base, inc_base;
703 u64 align_order;
704
705 shifted_base = balloc_base_shift(a, base);
706 if (shifted_base == 0)
707 align_order = __fls(len >> a->blk_shift);
708 else
709 align_order = min_t(u64,
710 __ffs(shifted_base >> a->blk_shift),
711 __fls(len >> a->blk_shift));
712
713 if (align_order > a->max_order) {
714 alloc_dbg(balloc_owner(a),
715 "Align order too big: %llu > %llu\n",
716 align_order, a->max_order);
717 return 0;
718 }
719
720 /*
721 * Generate a list of buddies that satisfy this allocation.
722 */
723 inc_base = shifted_base;
724 while (inc_base < (shifted_base + len)) {
725 u64 order_len = balloc_order_to_len(a, align_order);
726 u64 remaining;
727 struct gk20a_buddy *bud;
728
729 bud = __balloc_make_fixed_buddy(a,
730 balloc_base_unshift(a, inc_base),
731 align_order);
732 if (!bud) {
733 alloc_dbg(balloc_owner(a),
734 "Fixed buddy failed: {0x%llx, %llu}!\n",
735 balloc_base_unshift(a, inc_base),
736 align_order);
737 goto err_and_cleanup;
738 }
739
740 balloc_blist_rem(a, bud);
741 balloc_alloc_buddy(a, bud);
742 __balloc_buddy_list_add(a, bud, &falloc->buddies);
743
744 /* Book keeping. */
745 inc_base += order_len;
746 remaining = (shifted_base + len) - inc_base;
747 align_order = __ffs(inc_base >> a->blk_shift);
748
749 /* If we don't have much left - trim down align_order. */
750 if (balloc_order_to_len(a, align_order) > remaining)
751 align_order = __balloc_max_order_in(a, inc_base,
752 inc_base + remaining);
753 }
754
755 return base;
756
757err_and_cleanup:
758 while (!list_empty(&falloc->buddies)) {
759 struct gk20a_buddy *bud = list_first_entry(&falloc->buddies,
760 struct gk20a_buddy,
761 buddy_entry);
762
763 __balloc_buddy_list_rem(a, bud);
764 balloc_free_buddy(a, bud->start);
765 kmem_cache_free(buddy_cache, bud);
766 }
767
768 return 0;
769}
770
771static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a,
772 struct gk20a_fixed_alloc *falloc)
773{
774 struct gk20a_buddy *bud;
775
776 while (!list_empty(&falloc->buddies)) {
777 bud = list_first_entry(&falloc->buddies,
778 struct gk20a_buddy,
779 buddy_entry);
780 __balloc_buddy_list_rem(a, bud);
781
782 balloc_free_buddy(a, bud->start);
783 balloc_blist_add(a, bud);
784 a->bytes_freed += balloc_order_to_len(a, bud->order);
785
786 /*
787 * Attemp to defrag the allocation.
788 */
789 balloc_coalesce(a, bud);
790 }
791
792 kfree(falloc);
793}
794
795/*
796 * Allocate memory from the passed allocator.
797 */
798static u64 gk20a_buddy_balloc(struct gk20a_allocator *__a, u64 len)
799{
800 u64 order, addr;
801 int pte_size;
802 struct gk20a_buddy_allocator *a = __a->priv;
803
804 gk20a_alloc_trace_func();
805
806 alloc_lock(__a);
807
808 order = balloc_get_order(a, len);
809
810 if (order > a->max_order) {
811 alloc_unlock(__a);
812 alloc_dbg(balloc_owner(a), "Alloc fail\n");
813 gk20a_alloc_trace_func_done();
814 return 0;
815 }
816
817 /*
818 * For now pass the base address of the allocator's region to
819 * __get_pte_size(). This ensures we get the right page size for
820 * the alloc but we don't have to know what the real address is
821 * going to be quite yet.
822 *
823 * TODO: once userspace supports a unified address space pass 0 for
824 * the base. This will make only 'len' affect the PTE size.
825 */
826 if (a->flags & GPU_BALLOC_GVA_SPACE)
827 pte_size = __get_pte_size(a->vm, a->base, len);
828 else
829 pte_size = BALLOC_PTE_SIZE_ANY;
830
831 addr = __balloc_do_alloc(a, order, pte_size);
832
833 if (addr) {
834 a->bytes_alloced += len;
835 a->bytes_alloced_real += balloc_order_to_len(a, order);
836 alloc_dbg(balloc_owner(a),
837 "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n",
838 addr, order, len,
839 pte_size == gmmu_page_size_big ? "big" :
840 pte_size == gmmu_page_size_small ? "small" :
841 "NA/any");
842 } else {
843 alloc_dbg(balloc_owner(a), "Alloc failed: no mem!\n");
844 }
845
846 alloc_unlock(__a);
847
848 gk20a_alloc_trace_func_done();
849 return addr;
850}
851
852/*
853 * Allocate a fixed address allocation. The address of the allocation is @base
854 * and the length is @len. This is not a typical buddy allocator operation and
855 * as such has a high posibility of failure if the address space is heavily in
856 * use.
857 *
858 * Please do not use this function unless _absolutely_ necessary.
859 */
860static u64 gk20a_balloc_fixed_buddy(struct gk20a_allocator *__a,
861 u64 base, u64 len)
862{
863 u64 ret, real_bytes = 0;
864 struct gk20a_buddy *bud;
865 struct gk20a_fixed_alloc *falloc = NULL;
866 struct gk20a_buddy_allocator *a = __a->priv;
867
868 gk20a_alloc_trace_func();
869
870 /* If base isn't aligned to an order 0 block, fail. */
871 if (base & (a->blk_size - 1))
872 goto fail;
873
874 if (len == 0)
875 goto fail;
876
877 falloc = kmalloc(sizeof(*falloc), GFP_KERNEL);
878 if (!falloc)
879 goto fail;
880
881 INIT_LIST_HEAD(&falloc->buddies);
882 falloc->start = base;
883 falloc->end = base + len;
884
885 alloc_lock(__a);
886 if (!balloc_is_range_free(a, base, base + len)) {
887 alloc_dbg(balloc_owner(a),
888 "Range not free: 0x%llx -> 0x%llx\n",
889 base, base + len);
890 goto fail_unlock;
891 }
892
893 ret = __balloc_do_alloc_fixed(a, falloc, base, len);
894 if (!ret) {
895 alloc_dbg(balloc_owner(a),
896 "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n",
897 base, base + len);
898 goto fail_unlock;
899 }
900
901 balloc_alloc_fixed(a, falloc);
902
903 list_for_each_entry(bud, &falloc->buddies, buddy_entry)
904 real_bytes += (bud->end - bud->start);
905
906 a->bytes_alloced += len;
907 a->bytes_alloced_real += real_bytes;
908
909 alloc_unlock(__a);
910 alloc_dbg(balloc_owner(a), "Alloc (fixed) 0x%llx\n", base);
911
912 gk20a_alloc_trace_func_done();
913 return base;
914
915fail_unlock:
916 alloc_unlock(__a);
917fail:
918 kfree(falloc);
919 gk20a_alloc_trace_func_done();
920 return 0;
921}
922
923/*
924 * Free the passed allocation.
925 */
926static void gk20a_buddy_bfree(struct gk20a_allocator *__a, u64 addr)
927{
928 struct gk20a_buddy *bud;
929 struct gk20a_fixed_alloc *falloc;
930 struct gk20a_buddy_allocator *a = __a->priv;
931
932 gk20a_alloc_trace_func();
933
934 if (!addr) {
935 gk20a_alloc_trace_func_done();
936 return;
937 }
938
939 alloc_lock(__a);
940
941 /*
942 * First see if this is a fixed alloc. If not fall back to a regular
943 * buddy.
944 */
945 falloc = balloc_free_fixed(a, addr);
946 if (falloc) {
947 __balloc_do_free_fixed(a, falloc);
948 goto done;
949 }
950
951 bud = balloc_free_buddy(a, addr);
952 if (!bud)
953 goto done;
954
955 balloc_blist_add(a, bud);
956 a->bytes_freed += balloc_order_to_len(a, bud->order);
957
958 /*
959 * Attemp to defrag the allocation.
960 */
961 balloc_coalesce(a, bud);
962
963done:
964 alloc_unlock(__a);
965 alloc_dbg(balloc_owner(a), "Free 0x%llx\n", addr);
966 gk20a_alloc_trace_func_done();
967 return;
968}
969
970static u64 gk20a_buddy_alloc_length(struct gk20a_allocator *a)
971{
972 struct gk20a_buddy_allocator *ba = a->priv;
973
974 return ba->length;
975}
976
977static u64 gk20a_buddy_alloc_base(struct gk20a_allocator *a)
978{
979 struct gk20a_buddy_allocator *ba = a->priv;
980
981 return ba->start;
982}
983
984static int gk20a_buddy_alloc_inited(struct gk20a_allocator *a)
985{
986 struct gk20a_buddy_allocator *ba = a->priv;
987
988 return ba->initialized;
989}
990
991static u64 gk20a_buddy_alloc_end(struct gk20a_allocator *a)
992{
993 struct gk20a_buddy_allocator *ba = a->priv;
994
995 return ba->end;
996}
997
998/*
999 * Print the buddy allocator top level stats. If you pass @s as NULL then the
1000 * stats are printed to the kernel log. This lets this code be used for
1001 * debugging purposes internal to the allocator.
1002 */
1003static void gk20a_buddy_print_stats(struct gk20a_allocator *__a,
1004 struct seq_file *s, int lock)
1005{
1006 int i;
1007 struct rb_node *node;
1008 struct gk20a_fixed_alloc *falloc;
1009 struct gk20a_buddy_allocator *a = __a->priv;
1010
1011 __alloc_pstat(s, __a, "base = %llu, limit = %llu, blk_size = %llu\n",
1012 a->base, a->length, a->blk_size);
1013 __alloc_pstat(s, __a, "Internal params:\n");
1014 __alloc_pstat(s, __a, " start = 0x%llx\n", a->start);
1015 __alloc_pstat(s, __a, " end = 0x%llx\n", a->end);
1016 __alloc_pstat(s, __a, " count = 0x%llx\n", a->count);
1017 __alloc_pstat(s, __a, " blks = 0x%llx\n", a->blks);
1018 __alloc_pstat(s, __a, " max_order = %llu\n", a->max_order);
1019
1020 __alloc_pstat(s, __a, "Buddy blocks:\n");
1021 __alloc_pstat(s, __a, " Order Free Alloced Split\n");
1022 __alloc_pstat(s, __a, " ----- ---- ------- -----\n");
1023
1024 if (lock)
1025 alloc_lock(__a);
1026 for (i = a->max_order; i >= 0; i--) {
1027 if (a->buddy_list_len[i] == 0 &&
1028 a->buddy_list_alloced[i] == 0 &&
1029 a->buddy_list_split[i] == 0)
1030 continue;
1031
1032 __alloc_pstat(s, __a, " %3d %-7llu %-9llu %llu\n", i,
1033 a->buddy_list_len[i],
1034 a->buddy_list_alloced[i],
1035 a->buddy_list_split[i]);
1036 }
1037
1038 __alloc_pstat(s, __a, "\n");
1039
1040 for (node = rb_first(&a->fixed_allocs), i = 1;
1041 node != NULL;
1042 node = rb_next(node)) {
1043 falloc = container_of(node,
1044 struct gk20a_fixed_alloc, alloced_entry);
1045
1046 __alloc_pstat(s, __a, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n",
1047 i, falloc->start, falloc->end);
1048 }
1049
1050 __alloc_pstat(s, __a, "\n");
1051 __alloc_pstat(s, __a, "Bytes allocated: %llu\n",
1052 a->bytes_alloced);
1053 __alloc_pstat(s, __a, "Bytes allocated (real): %llu\n",
1054 a->bytes_alloced_real);
1055 __alloc_pstat(s, __a, "Bytes freed: %llu\n",
1056 a->bytes_freed);
1057
1058 if (lock)
1059 alloc_unlock(__a);
1060}
1061
1062static const struct gk20a_allocator_ops buddy_ops = {
1063 .alloc = gk20a_buddy_balloc,
1064 .free = gk20a_buddy_bfree,
1065
1066 .alloc_fixed = gk20a_balloc_fixed_buddy,
1067 /* .free_fixed not needed. */
1068
1069 .base = gk20a_buddy_alloc_base,
1070 .length = gk20a_buddy_alloc_length,
1071 .end = gk20a_buddy_alloc_end,
1072 .inited = gk20a_buddy_alloc_inited,
1073
1074 .fini = gk20a_buddy_allocator_destroy,
1075
1076 .print_stats = gk20a_buddy_print_stats,
1077};
1078
1079/*
1080 * Initialize a buddy allocator. Returns 0 on success. This allocator does
1081 * not necessarily manage bytes. It manages distinct ranges of resources. This
1082 * allows the allocator to work for things like comp_tags, semaphores, etc.
1083 *
1084 * @allocator: Ptr to an allocator struct to init.
1085 * @vm: GPU VM to associate this allocator with. Can be NULL. Will be used to
1086 * get PTE size for GVA spaces.
1087 * @name: Name of the allocator. Doesn't have to be static storage.
1088 * @base: The base address of the resource pool being managed.
1089 * @size: Number of resources in the pool.
1090 * @blk_size: Minimum number of resources to allocate at once. For things like
1091 * semaphores this is 1. For GVA this might be as much as 64k. This
1092 * corresponds to order 0. Must be power of 2.
1093 * @max_order: Pick a maximum order. If you leave this as 0, the buddy allocator
1094 * will try and pick a reasonable max order.
1095 * @flags: Extra flags necessary. See GPU_BALLOC_*.
1096 */
1097int __gk20a_buddy_allocator_init(struct gk20a_allocator *__a,
1098 struct vm_gk20a *vm, const char *name,
1099 u64 base, u64 size, u64 blk_size,
1100 u64 max_order, u64 flags)
1101{
1102 int err;
1103 struct gk20a_buddy_allocator *a;
1104
1105 /* blk_size must be greater than 0 and a power of 2. */
1106 if (blk_size == 0)
1107 return -EINVAL;
1108 if (blk_size & (blk_size - 1))
1109 return -EINVAL;
1110
1111 if (max_order > GPU_BALLOC_MAX_ORDER)
1112 return -EINVAL;
1113
1114 /* If this is to manage a GVA space we need a VM. */
1115 if (flags & GPU_BALLOC_GVA_SPACE && !vm)
1116 return -EINVAL;
1117
1118 a = kzalloc(sizeof(struct gk20a_buddy_allocator), GFP_KERNEL);
1119 if (!a)
1120 return -ENOMEM;
1121
1122 err = __gk20a_alloc_common_init(__a, name, a, &buddy_ops);
1123 if (err)
1124 goto fail;
1125
1126 a->base = base;
1127 a->length = size;
1128 a->blk_size = blk_size;
1129 a->blk_shift = __ffs(blk_size);
1130 a->owner = __a;
1131
1132 /*
1133 * If base is 0 then modfy base to be the size of one block so that we
1134 * can return errors by returning addr == 0.
1135 */
1136 if (a->base == 0) {
1137 a->base = a->blk_size;
1138 a->length -= a->blk_size;
1139 }
1140
1141 a->vm = vm;
1142 if (flags & GPU_BALLOC_GVA_SPACE)
1143 a->pte_blk_order = balloc_get_order(a, vm->big_page_size << 10);
1144
1145 a->flags = flags;
1146 a->max_order = max_order;
1147
1148 balloc_allocator_align(a);
1149 balloc_compute_max_order(a);
1150
1151 /* Shared buddy kmem_cache for all allocators. */
1152 if (!buddy_cache)
1153 buddy_cache = KMEM_CACHE(gk20a_buddy, 0);
1154 if (!buddy_cache) {
1155 err = -ENOMEM;
1156 goto fail;
1157 }
1158
1159 a->alloced_buddies = RB_ROOT;
1160 a->fixed_allocs = RB_ROOT;
1161 err = balloc_init_lists(a);
1162 if (err)
1163 goto fail;
1164
1165 a->initialized = 1;
1166
1167 gk20a_init_alloc_debug(__a);
1168 alloc_dbg(__a, "New allocator: type buddy\n");
1169 alloc_dbg(__a, " base 0x%llx\n", a->base);
1170 alloc_dbg(__a, " size 0x%llx\n", a->length);
1171 alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size);
1172 alloc_dbg(__a, " max_order %llu\n", a->max_order);
1173 alloc_dbg(__a, " flags 0x%llx\n", a->flags);
1174
1175 return 0;
1176
1177fail:
1178 kfree(a);
1179 return err;
1180}
1181
1182int gk20a_buddy_allocator_init(struct gk20a_allocator *a, const char *name,
1183 u64 base, u64 size, u64 blk_size, u64 flags)
1184{
1185 return __gk20a_buddy_allocator_init(a, NULL, name,
1186 base, size, blk_size, 0, 0);
1187}