summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2016-12-20 16:55:48 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2017-01-09 15:33:16 -0500
commit6df3992b60959d32c7113cb77e131a2547174f3a (patch)
treeefbdc9e6ccd2330d5c469ca0783ecb0137da8fc4 /drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c
parente229514bece5a109cdbfe263f6329efe987e5939 (diff)
gpu: nvgpu: Move allocators to common/mm/
Move the GPU allocators to common/mm/ since the allocators are common code across all GPUs. Also rename the allocator code to move away from gk20a_ prefixed structs and functions. This caused one issue with the nvgpu_alloc() and nvgpu_free() functions. There was a function for allocating either with kmalloc() or vmalloc() depending on the size of the allocation. Those have now been renamed to nvgpu_kalloc() and nvgpu_kfree(). Bug 1799159 Change-Id: Iddda92c013612bcb209847084ec85b8953002fa5 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1274400 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c1327
1 files changed, 0 insertions, 1327 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c
deleted file mode 100644
index 3715e9f8..00000000
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c
+++ /dev/null
@@ -1,1327 +0,0 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/kernel.h>
18#include <linux/slab.h>
19
20#include "mm_gk20a.h"
21#include "platform_gk20a.h"
22#include "gk20a_allocator.h"
23#include "buddy_allocator_priv.h"
24
25static struct kmem_cache *buddy_cache; /* slab cache for meta data. */
26
27/* Some other buddy allocator functions. */
28static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a,
29 u64 addr);
30static void balloc_coalesce(struct gk20a_buddy_allocator *a,
31 struct gk20a_buddy *b);
32static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a,
33 struct gk20a_fixed_alloc *falloc);
34
35/*
36 * This function is not present in older kernel's list.h code.
37 */
38#ifndef list_last_entry
39#define list_last_entry(ptr, type, member) \
40 list_entry((ptr)->prev, type, member)
41#endif
42
43/*
44 * GPU buddy allocator for various address spaces.
45 *
46 * Current limitations:
47 * o A fixed allocation could potentially be made that borders PDEs with
48 * different PTE sizes. This would require that fixed buffer to have
49 * different sized PTEs for different parts of the allocation. Probably
50 * best to just require PDE alignment for fixed address allocs.
51 *
52 * o It is currently possible to make an allocator that has a buddy alignment
53 * out of sync with the PDE block size alignment. A simple example is a
54 * 32GB address space starting at byte 1. Every buddy is shifted off by 1
55 * which means each buddy corresponf to more than one actual GPU page. The
56 * best way to fix this is probably just require PDE blocksize alignment
57 * for the start of the address space. At the moment all allocators are
58 * easily PDE aligned so this hasn't been a problem.
59 */
60
61/*
62 * Pick a suitable maximum order for this allocator.
63 *
64 * Hueristic: Just guessing that the best max order is the largest single
65 * block that will fit in the address space.
66 */
67static void balloc_compute_max_order(struct gk20a_buddy_allocator *a)
68{
69 u64 true_max_order = ilog2(a->blks);
70
71 if (a->max_order == 0) {
72 a->max_order = true_max_order;
73 return;
74 }
75
76 if (a->max_order > true_max_order)
77 a->max_order = true_max_order;
78 if (a->max_order > GPU_BALLOC_MAX_ORDER)
79 a->max_order = GPU_BALLOC_MAX_ORDER;
80}
81
82/*
83 * Since we can only allocate in chucks of a->blk_size we need to trim off
84 * any excess data that is not aligned to a->blk_size.
85 */
86static void balloc_allocator_align(struct gk20a_buddy_allocator *a)
87{
88 a->start = ALIGN(a->base, a->blk_size);
89 WARN_ON(a->start != a->base);
90 a->end = (a->base + a->length) & ~(a->blk_size - 1);
91 a->count = a->end - a->start;
92 a->blks = a->count >> a->blk_shift;
93}
94
95/*
96 * Pass NULL for parent if you want a top level buddy.
97 */
98static struct gk20a_buddy *balloc_new_buddy(struct gk20a_buddy_allocator *a,
99 struct gk20a_buddy *parent,
100 u64 start, u64 order)
101{
102 struct gk20a_buddy *new_buddy;
103
104 new_buddy = kmem_cache_alloc(buddy_cache, GFP_KERNEL);
105 if (!new_buddy)
106 return NULL;
107
108 memset(new_buddy, 0, sizeof(struct gk20a_buddy));
109
110 new_buddy->parent = parent;
111 new_buddy->start = start;
112 new_buddy->order = order;
113 new_buddy->end = start + (1 << order) * a->blk_size;
114 new_buddy->pte_size = BALLOC_PTE_SIZE_ANY;
115
116 return new_buddy;
117}
118
119static void __balloc_buddy_list_add(struct gk20a_buddy_allocator *a,
120 struct gk20a_buddy *b,
121 struct list_head *list)
122{
123 if (buddy_is_in_list(b)) {
124 alloc_dbg(balloc_owner(a),
125 "Oops: adding added buddy (%llu:0x%llx)\n",
126 b->order, b->start);
127 BUG();
128 }
129
130 /*
131 * Add big PTE blocks to the tail, small to the head for GVA spaces.
132 * This lets the code that checks if there are available blocks check
133 * without cycling through the entire list.
134 */
135 if (a->flags & GPU_ALLOC_GVA_SPACE &&
136 b->pte_size == gmmu_page_size_big)
137 list_add_tail(&b->buddy_entry, list);
138 else
139 list_add(&b->buddy_entry, list);
140
141 buddy_set_in_list(b);
142}
143
144static void __balloc_buddy_list_rem(struct gk20a_buddy_allocator *a,
145 struct gk20a_buddy *b)
146{
147 if (!buddy_is_in_list(b)) {
148 alloc_dbg(balloc_owner(a),
149 "Oops: removing removed buddy (%llu:0x%llx)\n",
150 b->order, b->start);
151 BUG();
152 }
153
154 list_del_init(&b->buddy_entry);
155 buddy_clr_in_list(b);
156}
157
158/*
159 * Add a buddy to one of the buddy lists and deal with the necessary
160 * book keeping. Adds the buddy to the list specified by the buddy's order.
161 */
162static void balloc_blist_add(struct gk20a_buddy_allocator *a,
163 struct gk20a_buddy *b)
164{
165 __balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order));
166 a->buddy_list_len[b->order]++;
167}
168
169static void balloc_blist_rem(struct gk20a_buddy_allocator *a,
170 struct gk20a_buddy *b)
171{
172 __balloc_buddy_list_rem(a, b);
173 a->buddy_list_len[b->order]--;
174}
175
176static u64 balloc_get_order(struct gk20a_buddy_allocator *a, u64 len)
177{
178 if (len == 0)
179 return 0;
180
181 len--;
182 len >>= a->blk_shift;
183
184 return fls(len);
185}
186
187static u64 __balloc_max_order_in(struct gk20a_buddy_allocator *a,
188 u64 start, u64 end)
189{
190 u64 size = (end - start) >> a->blk_shift;
191
192 if (size > 0)
193 return min_t(u64, ilog2(size), a->max_order);
194 else
195 return GPU_BALLOC_MAX_ORDER;
196}
197
198/*
199 * Initialize the buddy lists.
200 */
201static int balloc_init_lists(struct gk20a_buddy_allocator *a)
202{
203 int i;
204 u64 bstart, bend, order;
205 struct gk20a_buddy *buddy;
206
207 bstart = a->start;
208 bend = a->end;
209
210 /* First make sure the LLs are valid. */
211 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++)
212 INIT_LIST_HEAD(balloc_get_order_list(a, i));
213
214 while (bstart < bend) {
215 order = __balloc_max_order_in(a, bstart, bend);
216
217 buddy = balloc_new_buddy(a, NULL, bstart, order);
218 if (!buddy)
219 goto cleanup;
220
221 balloc_blist_add(a, buddy);
222 bstart += balloc_order_to_len(a, order);
223 }
224
225 return 0;
226
227cleanup:
228 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) {
229 if (!list_empty(balloc_get_order_list(a, i))) {
230 buddy = list_first_entry(balloc_get_order_list(a, i),
231 struct gk20a_buddy, buddy_entry);
232 balloc_blist_rem(a, buddy);
233 kmem_cache_free(buddy_cache, buddy);
234 }
235 }
236
237 return -ENOMEM;
238}
239
240/*
241 * Clean up and destroy the passed allocator.
242 */
243static void gk20a_buddy_allocator_destroy(struct gk20a_allocator *__a)
244{
245 int i;
246 struct rb_node *node;
247 struct gk20a_buddy *bud;
248 struct gk20a_fixed_alloc *falloc;
249 struct gk20a_buddy_allocator *a = __a->priv;
250
251 alloc_lock(__a);
252
253 gk20a_fini_alloc_debug(__a);
254
255 /*
256 * Free the fixed allocs first.
257 */
258 while ((node = rb_first(&a->fixed_allocs)) != NULL) {
259 falloc = container_of(node,
260 struct gk20a_fixed_alloc, alloced_entry);
261
262 rb_erase(node, &a->fixed_allocs);
263 __balloc_do_free_fixed(a, falloc);
264 }
265
266 /*
267 * And now free all outstanding allocations.
268 */
269 while ((node = rb_first(&a->alloced_buddies)) != NULL) {
270 bud = container_of(node, struct gk20a_buddy, alloced_entry);
271 balloc_free_buddy(a, bud->start);
272 balloc_blist_add(a, bud);
273 balloc_coalesce(a, bud);
274 }
275
276 /*
277 * Now clean up the unallocated buddies.
278 */
279 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) {
280 BUG_ON(a->buddy_list_alloced[i] != 0);
281
282 while (!list_empty(balloc_get_order_list(a, i))) {
283 bud = list_first_entry(balloc_get_order_list(a, i),
284 struct gk20a_buddy, buddy_entry);
285 balloc_blist_rem(a, bud);
286 kmem_cache_free(buddy_cache, bud);
287 }
288
289 if (a->buddy_list_len[i] != 0) {
290 pr_info("Excess buddies!!! (%d: %llu)\n",
291 i, a->buddy_list_len[i]);
292 BUG();
293 }
294 if (a->buddy_list_split[i] != 0) {
295 pr_info("Excess split nodes!!! (%d: %llu)\n",
296 i, a->buddy_list_split[i]);
297 BUG();
298 }
299 if (a->buddy_list_alloced[i] != 0) {
300 pr_info("Excess alloced nodes!!! (%d: %llu)\n",
301 i, a->buddy_list_alloced[i]);
302 BUG();
303 }
304 }
305
306 kfree(a);
307
308 alloc_unlock(__a);
309}
310
311/*
312 * Combine the passed buddy if possible. The pointer in @b may not be valid
313 * after this as the buddy may be freed.
314 *
315 * @a must be locked.
316 */
317static void balloc_coalesce(struct gk20a_buddy_allocator *a,
318 struct gk20a_buddy *b)
319{
320 struct gk20a_buddy *parent;
321
322 if (buddy_is_alloced(b) || buddy_is_split(b))
323 return;
324
325 /*
326 * If both our buddy and I are both not allocated and not split then
327 * we can coalesce ourselves.
328 */
329 if (!b->buddy)
330 return;
331 if (buddy_is_alloced(b->buddy) || buddy_is_split(b->buddy))
332 return;
333
334 parent = b->parent;
335
336 balloc_blist_rem(a, b);
337 balloc_blist_rem(a, b->buddy);
338
339 buddy_clr_split(parent);
340 a->buddy_list_split[parent->order]--;
341 balloc_blist_add(a, parent);
342
343 /*
344 * Recursively coalesce as far as we can go.
345 */
346 balloc_coalesce(a, parent);
347
348 /* Clean up the remains. */
349 kmem_cache_free(buddy_cache, b->buddy);
350 kmem_cache_free(buddy_cache, b);
351}
352
353/*
354 * Split a buddy into two new buddies who are 1/2 the size of the parent buddy.
355 *
356 * @a must be locked.
357 */
358static int balloc_split_buddy(struct gk20a_buddy_allocator *a,
359 struct gk20a_buddy *b, int pte_size)
360{
361 struct gk20a_buddy *left, *right;
362 u64 half;
363
364 left = balloc_new_buddy(a, b, b->start, b->order - 1);
365 if (!left)
366 return -ENOMEM;
367
368 half = (b->end - b->start) / 2;
369
370 right = balloc_new_buddy(a, b, b->start + half, b->order - 1);
371 if (!right) {
372 kmem_cache_free(buddy_cache, left);
373 return -ENOMEM;
374 }
375
376 buddy_set_split(b);
377 a->buddy_list_split[b->order]++;
378
379 b->left = left;
380 b->right = right;
381 left->buddy = right;
382 right->buddy = left;
383 left->parent = b;
384 right->parent = b;
385
386 /* PTE considerations. */
387 if (a->flags & GPU_ALLOC_GVA_SPACE &&
388 left->order <= a->pte_blk_order) {
389 left->pte_size = pte_size;
390 right->pte_size = pte_size;
391 }
392
393 balloc_blist_rem(a, b);
394 balloc_blist_add(a, left);
395 balloc_blist_add(a, right);
396
397 return 0;
398}
399
400/*
401 * Place the passed buddy into the RB tree for allocated buddies. Never fails
402 * unless the passed entry is a duplicate which is a bug.
403 *
404 * @a must be locked.
405 */
406static void balloc_alloc_buddy(struct gk20a_buddy_allocator *a,
407 struct gk20a_buddy *b)
408{
409 struct rb_node **new = &(a->alloced_buddies.rb_node);
410 struct rb_node *parent = NULL;
411
412 while (*new) {
413 struct gk20a_buddy *bud = container_of(*new, struct gk20a_buddy,
414 alloced_entry);
415
416 parent = *new;
417 if (b->start < bud->start)
418 new = &((*new)->rb_left);
419 else if (b->start > bud->start)
420 new = &((*new)->rb_right);
421 else
422 BUG_ON("Duplicate entries in allocated list!\n");
423 }
424
425 rb_link_node(&b->alloced_entry, parent, new);
426 rb_insert_color(&b->alloced_entry, &a->alloced_buddies);
427
428 buddy_set_alloced(b);
429 a->buddy_list_alloced[b->order]++;
430}
431
432/*
433 * Remove the passed buddy from the allocated buddy RB tree. Returns the
434 * deallocated buddy for further processing.
435 *
436 * @a must be locked.
437 */
438static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a,
439 u64 addr)
440{
441 struct rb_node *node = a->alloced_buddies.rb_node;
442 struct gk20a_buddy *bud;
443
444 while (node) {
445 bud = container_of(node, struct gk20a_buddy, alloced_entry);
446
447 if (addr < bud->start)
448 node = node->rb_left;
449 else if (addr > bud->start)
450 node = node->rb_right;
451 else
452 break;
453 }
454
455 if (!node)
456 return NULL;
457
458 rb_erase(node, &a->alloced_buddies);
459 buddy_clr_alloced(bud);
460 a->buddy_list_alloced[bud->order]--;
461
462 return bud;
463}
464
465/*
466 * Find a suitable buddy for the given order and PTE type (big or little).
467 */
468static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_buddy_allocator *a,
469 u64 order, int pte_size)
470{
471 struct gk20a_buddy *bud;
472
473 if (order > a->max_order ||
474 list_empty(balloc_get_order_list(a, order)))
475 return NULL;
476
477 if (a->flags & GPU_ALLOC_GVA_SPACE &&
478 pte_size == gmmu_page_size_big)
479 bud = list_last_entry(balloc_get_order_list(a, order),
480 struct gk20a_buddy, buddy_entry);
481 else
482 bud = list_first_entry(balloc_get_order_list(a, order),
483 struct gk20a_buddy, buddy_entry);
484
485 if (bud->pte_size != BALLOC_PTE_SIZE_ANY &&
486 bud->pte_size != pte_size)
487 return NULL;
488
489 return bud;
490}
491
492/*
493 * Allocate a suitably sized buddy. If no suitable buddy exists split higher
494 * order buddies until we have a suitable buddy to allocate.
495 *
496 * For PDE grouping add an extra check to see if a buddy is suitable: that the
497 * buddy exists in a PDE who's PTE size is reasonable
498 *
499 * @a must be locked.
500 */
501static u64 __balloc_do_alloc(struct gk20a_buddy_allocator *a,
502 u64 order, int pte_size)
503{
504 u64 split_order;
505 struct gk20a_buddy *bud = NULL;
506
507 split_order = order;
508 while (split_order <= a->max_order &&
509 !(bud = __balloc_find_buddy(a, split_order, pte_size)))
510 split_order++;
511
512 /* Out of memory! */
513 if (!bud)
514 return 0;
515
516 while (bud->order != order) {
517 if (balloc_split_buddy(a, bud, pte_size))
518 return 0; /* No mem... */
519 bud = bud->left;
520 }
521
522 balloc_blist_rem(a, bud);
523 balloc_alloc_buddy(a, bud);
524
525 return bud->start;
526}
527
528/*
529 * See if the passed range is actually available for allocation. If so, then
530 * return 1, otherwise return 0.
531 *
532 * TODO: Right now this uses the unoptimal approach of going through all
533 * outstanding allocations and checking their base/ends. This could be better.
534 */
535static int balloc_is_range_free(struct gk20a_buddy_allocator *a,
536 u64 base, u64 end)
537{
538 struct rb_node *node;
539 struct gk20a_buddy *bud;
540
541 node = rb_first(&a->alloced_buddies);
542 if (!node)
543 return 1; /* No allocs yet. */
544
545 bud = container_of(node, struct gk20a_buddy, alloced_entry);
546
547 while (bud->start < end) {
548 if ((bud->start > base && bud->start < end) ||
549 (bud->end > base && bud->end < end))
550 return 0;
551
552 node = rb_next(node);
553 if (!node)
554 break;
555 bud = container_of(node, struct gk20a_buddy, alloced_entry);
556 }
557
558 return 1;
559}
560
561static void balloc_alloc_fixed(struct gk20a_buddy_allocator *a,
562 struct gk20a_fixed_alloc *f)
563{
564 struct rb_node **new = &(a->fixed_allocs.rb_node);
565 struct rb_node *parent = NULL;
566
567 while (*new) {
568 struct gk20a_fixed_alloc *falloc =
569 container_of(*new, struct gk20a_fixed_alloc,
570 alloced_entry);
571
572 BUG_ON(!virt_addr_valid(falloc));
573
574 parent = *new;
575 if (f->start < falloc->start)
576 new = &((*new)->rb_left);
577 else if (f->start > falloc->start)
578 new = &((*new)->rb_right);
579 else
580 BUG_ON("Duplicate entries in allocated list!\n");
581 }
582
583 rb_link_node(&f->alloced_entry, parent, new);
584 rb_insert_color(&f->alloced_entry, &a->fixed_allocs);
585}
586
587/*
588 * Remove the passed buddy from the allocated buddy RB tree. Returns the
589 * deallocated buddy for further processing.
590 *
591 * @a must be locked.
592 */
593static struct gk20a_fixed_alloc *balloc_free_fixed(
594 struct gk20a_buddy_allocator *a, u64 addr)
595{
596 struct rb_node *node = a->fixed_allocs.rb_node;
597 struct gk20a_fixed_alloc *falloc;
598
599 while (node) {
600 falloc = container_of(node,
601 struct gk20a_fixed_alloc, alloced_entry);
602
603 if (addr < falloc->start)
604 node = node->rb_left;
605 else if (addr > falloc->start)
606 node = node->rb_right;
607 else
608 break;
609 }
610
611 if (!node)
612 return NULL;
613
614 rb_erase(node, &a->fixed_allocs);
615
616 return falloc;
617}
618
619/*
620 * Find the parent range - doesn't necessarily need the parent to actually exist
621 * as a buddy. Finding an existing parent comes later...
622 */
623static void __balloc_get_parent_range(struct gk20a_buddy_allocator *a,
624 u64 base, u64 order,
625 u64 *pbase, u64 *porder)
626{
627 u64 base_mask;
628 u64 shifted_base = balloc_base_shift(a, base);
629
630 order++;
631 base_mask = ~((a->blk_size << order) - 1);
632
633 shifted_base &= base_mask;
634
635 *pbase = balloc_base_unshift(a, shifted_base);
636 *porder = order;
637}
638
639/*
640 * Makes a buddy at the passed address. This will make all parent buddies
641 * necessary for this buddy to exist as well.
642 */
643static struct gk20a_buddy *__balloc_make_fixed_buddy(
644 struct gk20a_buddy_allocator *a, u64 base, u64 order)
645{
646 struct gk20a_buddy *bud = NULL;
647 struct list_head *order_list;
648 u64 cur_order = order, cur_base = base;
649
650 /*
651 * Algo:
652 * 1. Keep jumping up a buddy order until we find the real buddy that
653 * this buddy exists in.
654 * 2. Then work our way down through the buddy tree until we hit a dead
655 * end.
656 * 3. Start splitting buddies until we split to the one we need to
657 * make.
658 */
659 while (cur_order <= a->max_order) {
660 int found = 0;
661
662 order_list = balloc_get_order_list(a, cur_order);
663 list_for_each_entry(bud, order_list, buddy_entry) {
664 if (bud->start == cur_base) {
665 found = 1;
666 break;
667 }
668 }
669
670 if (found)
671 break;
672
673 __balloc_get_parent_range(a, cur_base, cur_order,
674 &cur_base, &cur_order);
675 }
676
677 if (cur_order > a->max_order) {
678 alloc_dbg(balloc_owner(a), "No buddy for range ???\n");
679 return NULL;
680 }
681
682 /* Split this buddy as necessary until we get the target buddy. */
683 while (bud->start != base || bud->order != order) {
684 if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) {
685 balloc_coalesce(a, bud);
686 return NULL;
687 }
688
689 if (base < bud->right->start)
690 bud = bud->left;
691 else
692 bud = bud->right;
693
694 }
695
696 return bud;
697}
698
699static u64 __balloc_do_alloc_fixed(struct gk20a_buddy_allocator *a,
700 struct gk20a_fixed_alloc *falloc,
701 u64 base, u64 len)
702{
703 u64 shifted_base, inc_base;
704 u64 align_order;
705
706 shifted_base = balloc_base_shift(a, base);
707 if (shifted_base == 0)
708 align_order = __fls(len >> a->blk_shift);
709 else
710 align_order = min_t(u64,
711 __ffs(shifted_base >> a->blk_shift),
712 __fls(len >> a->blk_shift));
713
714 if (align_order > a->max_order) {
715 alloc_dbg(balloc_owner(a),
716 "Align order too big: %llu > %llu\n",
717 align_order, a->max_order);
718 return 0;
719 }
720
721 /*
722 * Generate a list of buddies that satisfy this allocation.
723 */
724 inc_base = shifted_base;
725 while (inc_base < (shifted_base + len)) {
726 u64 order_len = balloc_order_to_len(a, align_order);
727 u64 remaining;
728 struct gk20a_buddy *bud;
729
730 bud = __balloc_make_fixed_buddy(a,
731 balloc_base_unshift(a, inc_base),
732 align_order);
733 if (!bud) {
734 alloc_dbg(balloc_owner(a),
735 "Fixed buddy failed: {0x%llx, %llu}!\n",
736 balloc_base_unshift(a, inc_base),
737 align_order);
738 goto err_and_cleanup;
739 }
740
741 balloc_blist_rem(a, bud);
742 balloc_alloc_buddy(a, bud);
743 __balloc_buddy_list_add(a, bud, &falloc->buddies);
744
745 /* Book keeping. */
746 inc_base += order_len;
747 remaining = (shifted_base + len) - inc_base;
748 align_order = __ffs(inc_base >> a->blk_shift);
749
750 /* If we don't have much left - trim down align_order. */
751 if (balloc_order_to_len(a, align_order) > remaining)
752 align_order = __balloc_max_order_in(a, inc_base,
753 inc_base + remaining);
754 }
755
756 return base;
757
758err_and_cleanup:
759 while (!list_empty(&falloc->buddies)) {
760 struct gk20a_buddy *bud = list_first_entry(&falloc->buddies,
761 struct gk20a_buddy,
762 buddy_entry);
763
764 __balloc_buddy_list_rem(a, bud);
765 balloc_free_buddy(a, bud->start);
766 kmem_cache_free(buddy_cache, bud);
767 }
768
769 return 0;
770}
771
772static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a,
773 struct gk20a_fixed_alloc *falloc)
774{
775 struct gk20a_buddy *bud;
776
777 while (!list_empty(&falloc->buddies)) {
778 bud = list_first_entry(&falloc->buddies,
779 struct gk20a_buddy,
780 buddy_entry);
781 __balloc_buddy_list_rem(a, bud);
782
783 balloc_free_buddy(a, bud->start);
784 balloc_blist_add(a, bud);
785 a->bytes_freed += balloc_order_to_len(a, bud->order);
786
787 /*
788 * Attemp to defrag the allocation.
789 */
790 balloc_coalesce(a, bud);
791 }
792
793 kfree(falloc);
794}
795
796/*
797 * Allocate memory from the passed allocator.
798 */
799static u64 gk20a_buddy_balloc(struct gk20a_allocator *__a, u64 len)
800{
801 u64 order, addr;
802 int pte_size;
803 struct gk20a_buddy_allocator *a = __a->priv;
804
805 gk20a_alloc_trace_func();
806
807 alloc_lock(__a);
808
809 order = balloc_get_order(a, len);
810
811 if (order > a->max_order) {
812 alloc_unlock(__a);
813 alloc_dbg(balloc_owner(a), "Alloc fail\n");
814 gk20a_alloc_trace_func_done();
815 return 0;
816 }
817
818 /*
819 * For now pass the base address of the allocator's region to
820 * __get_pte_size(). This ensures we get the right page size for
821 * the alloc but we don't have to know what the real address is
822 * going to be quite yet.
823 *
824 * TODO: once userspace supports a unified address space pass 0 for
825 * the base. This will make only 'len' affect the PTE size.
826 */
827 if (a->flags & GPU_ALLOC_GVA_SPACE)
828 pte_size = __get_pte_size(a->vm, a->base, len);
829 else
830 pte_size = BALLOC_PTE_SIZE_ANY;
831
832 addr = __balloc_do_alloc(a, order, pte_size);
833
834 if (addr) {
835 a->bytes_alloced += len;
836 a->bytes_alloced_real += balloc_order_to_len(a, order);
837 alloc_dbg(balloc_owner(a),
838 "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n",
839 addr, order, len,
840 pte_size == gmmu_page_size_big ? "big" :
841 pte_size == gmmu_page_size_small ? "small" :
842 "NA/any");
843 } else {
844 alloc_dbg(balloc_owner(a), "Alloc failed: no mem!\n");
845 }
846
847 a->alloc_made = 1;
848
849 alloc_unlock(__a);
850
851 gk20a_alloc_trace_func_done();
852 return addr;
853}
854
855/*
856 * Requires @__a to be locked.
857 */
858static u64 __gk20a_balloc_fixed_buddy(struct gk20a_allocator *__a,
859 u64 base, u64 len)
860{
861 u64 ret, real_bytes = 0;
862 struct gk20a_buddy *bud;
863 struct gk20a_fixed_alloc *falloc = NULL;
864 struct gk20a_buddy_allocator *a = __a->priv;
865
866 gk20a_alloc_trace_func();
867
868 /* If base isn't aligned to an order 0 block, fail. */
869 if (base & (a->blk_size - 1))
870 goto fail;
871
872 if (len == 0)
873 goto fail;
874
875 falloc = kmalloc(sizeof(*falloc), GFP_KERNEL);
876 if (!falloc)
877 goto fail;
878
879 INIT_LIST_HEAD(&falloc->buddies);
880 falloc->start = base;
881 falloc->end = base + len;
882
883 if (!balloc_is_range_free(a, base, base + len)) {
884 alloc_dbg(balloc_owner(a),
885 "Range not free: 0x%llx -> 0x%llx\n",
886 base, base + len);
887 goto fail_unlock;
888 }
889
890 ret = __balloc_do_alloc_fixed(a, falloc, base, len);
891 if (!ret) {
892 alloc_dbg(balloc_owner(a),
893 "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n",
894 base, base + len);
895 goto fail_unlock;
896 }
897
898 balloc_alloc_fixed(a, falloc);
899
900 list_for_each_entry(bud, &falloc->buddies, buddy_entry)
901 real_bytes += (bud->end - bud->start);
902
903 a->bytes_alloced += len;
904 a->bytes_alloced_real += real_bytes;
905
906 alloc_dbg(balloc_owner(a), "Alloc (fixed) 0x%llx\n", base);
907
908 gk20a_alloc_trace_func_done();
909 return base;
910
911fail_unlock:
912 alloc_unlock(__a);
913fail:
914 kfree(falloc);
915 gk20a_alloc_trace_func_done();
916 return 0;
917}
918
919/*
920 * Allocate a fixed address allocation. The address of the allocation is @base
921 * and the length is @len. This is not a typical buddy allocator operation and
922 * as such has a high posibility of failure if the address space is heavily in
923 * use.
924 *
925 * Please do not use this function unless _absolutely_ necessary.
926 */
927static u64 gk20a_balloc_fixed_buddy(struct gk20a_allocator *__a,
928 u64 base, u64 len)
929{
930 u64 alloc;
931 struct gk20a_buddy_allocator *a = __a->priv;
932
933 alloc_lock(__a);
934 alloc = __gk20a_balloc_fixed_buddy(__a, base, len);
935 a->alloc_made = 1;
936 alloc_unlock(__a);
937
938 return alloc;
939}
940
941/*
942 * Free the passed allocation.
943 */
944static void gk20a_buddy_bfree(struct gk20a_allocator *__a, u64 addr)
945{
946 struct gk20a_buddy *bud;
947 struct gk20a_fixed_alloc *falloc;
948 struct gk20a_buddy_allocator *a = __a->priv;
949
950 gk20a_alloc_trace_func();
951
952 if (!addr) {
953 gk20a_alloc_trace_func_done();
954 return;
955 }
956
957 alloc_lock(__a);
958
959 /*
960 * First see if this is a fixed alloc. If not fall back to a regular
961 * buddy.
962 */
963 falloc = balloc_free_fixed(a, addr);
964 if (falloc) {
965 __balloc_do_free_fixed(a, falloc);
966 goto done;
967 }
968
969 bud = balloc_free_buddy(a, addr);
970 if (!bud)
971 goto done;
972
973 balloc_blist_add(a, bud);
974 a->bytes_freed += balloc_order_to_len(a, bud->order);
975
976 /*
977 * Attemp to defrag the allocation.
978 */
979 balloc_coalesce(a, bud);
980
981done:
982 alloc_unlock(__a);
983 alloc_dbg(balloc_owner(a), "Free 0x%llx\n", addr);
984 gk20a_alloc_trace_func_done();
985 return;
986}
987
988static bool gk20a_buddy_reserve_is_possible(struct gk20a_buddy_allocator *a,
989 struct gk20a_alloc_carveout *co)
990{
991 struct gk20a_alloc_carveout *tmp;
992 u64 co_base, co_end;
993
994 co_base = co->base;
995 co_end = co->base + co->length;
996
997 /*
998 * Not the fastest approach but we should not have that many carveouts
999 * for any reasonable allocator.
1000 */
1001 list_for_each_entry(tmp, &a->co_list, co_entry) {
1002 if ((co_base >= tmp->base &&
1003 co_base < (tmp->base + tmp->length)) ||
1004 (co_end >= tmp->base &&
1005 co_end < (tmp->base + tmp->length)))
1006 return false;
1007 }
1008
1009 return true;
1010}
1011
1012/*
1013 * Carveouts can only be reserved before any regular allocations have been
1014 * made.
1015 */
1016static int gk20a_buddy_reserve_co(struct gk20a_allocator *__a,
1017 struct gk20a_alloc_carveout *co)
1018{
1019 struct gk20a_buddy_allocator *a = __a->priv;
1020 u64 addr;
1021 int err = 0;
1022
1023 if (co->base < a->start || (co->base + co->length) > a->end ||
1024 a->alloc_made)
1025 return -EINVAL;
1026
1027 alloc_lock(__a);
1028
1029 if (!gk20a_buddy_reserve_is_possible(a, co)) {
1030 err = -EBUSY;
1031 goto done;
1032 }
1033
1034 /* Should not be possible to fail... */
1035 addr = __gk20a_balloc_fixed_buddy(__a, co->base, co->length);
1036 if (!addr) {
1037 err = -ENOMEM;
1038 pr_warn("%s: Failed to reserve a valid carveout!\n", __func__);
1039 goto done;
1040 }
1041
1042 list_add(&co->co_entry, &a->co_list);
1043
1044done:
1045 alloc_unlock(__a);
1046 return err;
1047}
1048
1049/*
1050 * Carveouts can be release at any time.
1051 */
1052static void gk20a_buddy_release_co(struct gk20a_allocator *__a,
1053 struct gk20a_alloc_carveout *co)
1054{
1055 alloc_lock(__a);
1056
1057 list_del_init(&co->co_entry);
1058 gk20a_free(__a, co->base);
1059
1060 alloc_unlock(__a);
1061}
1062
1063static u64 gk20a_buddy_alloc_length(struct gk20a_allocator *a)
1064{
1065 struct gk20a_buddy_allocator *ba = a->priv;
1066
1067 return ba->length;
1068}
1069
1070static u64 gk20a_buddy_alloc_base(struct gk20a_allocator *a)
1071{
1072 struct gk20a_buddy_allocator *ba = a->priv;
1073
1074 return ba->start;
1075}
1076
1077static int gk20a_buddy_alloc_inited(struct gk20a_allocator *a)
1078{
1079 struct gk20a_buddy_allocator *ba = a->priv;
1080 int inited = ba->initialized;
1081
1082 rmb();
1083 return inited;
1084}
1085
1086static u64 gk20a_buddy_alloc_end(struct gk20a_allocator *a)
1087{
1088 struct gk20a_buddy_allocator *ba = a->priv;
1089
1090 return ba->end;
1091}
1092
1093static u64 gk20a_buddy_alloc_space(struct gk20a_allocator *a)
1094{
1095 struct gk20a_buddy_allocator *ba = a->priv;
1096 u64 space;
1097
1098 alloc_lock(a);
1099 space = ba->end - ba->start -
1100 (ba->bytes_alloced_real - ba->bytes_freed);
1101 alloc_unlock(a);
1102
1103 return space;
1104}
1105
1106/*
1107 * Print the buddy allocator top level stats. If you pass @s as NULL then the
1108 * stats are printed to the kernel log. This lets this code be used for
1109 * debugging purposes internal to the allocator.
1110 */
1111static void gk20a_buddy_print_stats(struct gk20a_allocator *__a,
1112 struct seq_file *s, int lock)
1113{
1114 int i = 0;
1115 struct rb_node *node;
1116 struct gk20a_fixed_alloc *falloc;
1117 struct gk20a_alloc_carveout *tmp;
1118 struct gk20a_buddy_allocator *a = __a->priv;
1119
1120 __alloc_pstat(s, __a, "base = %llu, limit = %llu, blk_size = %llu\n",
1121 a->base, a->length, a->blk_size);
1122 __alloc_pstat(s, __a, "Internal params:\n");
1123 __alloc_pstat(s, __a, " start = 0x%llx\n", a->start);
1124 __alloc_pstat(s, __a, " end = 0x%llx\n", a->end);
1125 __alloc_pstat(s, __a, " count = 0x%llx\n", a->count);
1126 __alloc_pstat(s, __a, " blks = 0x%llx\n", a->blks);
1127 __alloc_pstat(s, __a, " max_order = %llu\n", a->max_order);
1128
1129 if (lock)
1130 alloc_lock(__a);
1131
1132 if (!list_empty(&a->co_list)) {
1133 __alloc_pstat(s, __a, "\n");
1134 __alloc_pstat(s, __a, "Carveouts:\n");
1135 list_for_each_entry(tmp, &a->co_list, co_entry)
1136 __alloc_pstat(s, __a,
1137 " CO %2d: %-20s 0x%010llx + 0x%llx\n",
1138 i++, tmp->name, tmp->base, tmp->length);
1139 }
1140
1141 __alloc_pstat(s, __a, "\n");
1142 __alloc_pstat(s, __a, "Buddy blocks:\n");
1143 __alloc_pstat(s, __a, " Order Free Alloced Split\n");
1144 __alloc_pstat(s, __a, " ----- ---- ------- -----\n");
1145
1146 for (i = a->max_order; i >= 0; i--) {
1147 if (a->buddy_list_len[i] == 0 &&
1148 a->buddy_list_alloced[i] == 0 &&
1149 a->buddy_list_split[i] == 0)
1150 continue;
1151
1152 __alloc_pstat(s, __a, " %3d %-7llu %-9llu %llu\n", i,
1153 a->buddy_list_len[i],
1154 a->buddy_list_alloced[i],
1155 a->buddy_list_split[i]);
1156 }
1157
1158 __alloc_pstat(s, __a, "\n");
1159
1160 for (node = rb_first(&a->fixed_allocs), i = 1;
1161 node != NULL;
1162 node = rb_next(node)) {
1163 falloc = container_of(node,
1164 struct gk20a_fixed_alloc, alloced_entry);
1165
1166 __alloc_pstat(s, __a, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n",
1167 i, falloc->start, falloc->end);
1168 }
1169
1170 __alloc_pstat(s, __a, "\n");
1171 __alloc_pstat(s, __a, "Bytes allocated: %llu\n",
1172 a->bytes_alloced);
1173 __alloc_pstat(s, __a, "Bytes allocated (real): %llu\n",
1174 a->bytes_alloced_real);
1175 __alloc_pstat(s, __a, "Bytes freed: %llu\n",
1176 a->bytes_freed);
1177
1178 if (lock)
1179 alloc_unlock(__a);
1180}
1181
1182static const struct gk20a_allocator_ops buddy_ops = {
1183 .alloc = gk20a_buddy_balloc,
1184 .free = gk20a_buddy_bfree,
1185
1186 .alloc_fixed = gk20a_balloc_fixed_buddy,
1187 /* .free_fixed not needed. */
1188
1189 .reserve_carveout = gk20a_buddy_reserve_co,
1190 .release_carveout = gk20a_buddy_release_co,
1191
1192 .base = gk20a_buddy_alloc_base,
1193 .length = gk20a_buddy_alloc_length,
1194 .end = gk20a_buddy_alloc_end,
1195 .inited = gk20a_buddy_alloc_inited,
1196 .space = gk20a_buddy_alloc_space,
1197
1198 .fini = gk20a_buddy_allocator_destroy,
1199
1200 .print_stats = gk20a_buddy_print_stats,
1201};
1202
1203/*
1204 * Initialize a buddy allocator. Returns 0 on success. This allocator does
1205 * not necessarily manage bytes. It manages distinct ranges of resources. This
1206 * allows the allocator to work for things like comp_tags, semaphores, etc.
1207 *
1208 * @allocator: Ptr to an allocator struct to init.
1209 * @vm: GPU VM to associate this allocator with. Can be NULL. Will be used to
1210 * get PTE size for GVA spaces.
1211 * @name: Name of the allocator. Doesn't have to be static storage.
1212 * @base: The base address of the resource pool being managed.
1213 * @size: Number of resources in the pool.
1214 * @blk_size: Minimum number of resources to allocate at once. For things like
1215 * semaphores this is 1. For GVA this might be as much as 64k. This
1216 * corresponds to order 0. Must be power of 2.
1217 * @max_order: Pick a maximum order. If you leave this as 0, the buddy allocator
1218 * will try and pick a reasonable max order.
1219 * @flags: Extra flags necessary. See GPU_BALLOC_*.
1220 */
1221int __gk20a_buddy_allocator_init(struct gk20a *g, struct gk20a_allocator *__a,
1222 struct vm_gk20a *vm, const char *name,
1223 u64 base, u64 size, u64 blk_size,
1224 u64 max_order, u64 flags)
1225{
1226 int err;
1227 u64 pde_size;
1228 struct gk20a_buddy_allocator *a;
1229
1230 /* blk_size must be greater than 0 and a power of 2. */
1231 if (blk_size == 0)
1232 return -EINVAL;
1233 if (blk_size & (blk_size - 1))
1234 return -EINVAL;
1235
1236 if (max_order > GPU_BALLOC_MAX_ORDER)
1237 return -EINVAL;
1238
1239 /* If this is to manage a GVA space we need a VM. */
1240 if (flags & GPU_ALLOC_GVA_SPACE && !vm)
1241 return -EINVAL;
1242
1243 a = kzalloc(sizeof(struct gk20a_buddy_allocator), GFP_KERNEL);
1244 if (!a)
1245 return -ENOMEM;
1246
1247 err = __gk20a_alloc_common_init(__a, name, a, false, &buddy_ops);
1248 if (err)
1249 goto fail;
1250
1251 a->base = base;
1252 a->length = size;
1253 a->blk_size = blk_size;
1254 a->blk_shift = __ffs(blk_size);
1255 a->owner = __a;
1256
1257 /*
1258 * If base is 0 then modfy base to be the size of one block so that we
1259 * can return errors by returning addr == 0.
1260 */
1261 if (a->base == 0) {
1262 a->base = a->blk_size;
1263 a->length -= a->blk_size;
1264 }
1265
1266 a->vm = vm;
1267 if (flags & GPU_ALLOC_GVA_SPACE) {
1268 pde_size = ((u64)vm->big_page_size) << 10;
1269 a->pte_blk_order = balloc_get_order(a, pde_size);
1270 }
1271
1272 /*
1273 * When we have a GVA space with big_pages enabled the size and base
1274 * must be PDE aligned. If big_pages are not enabled then this
1275 * requirement is not necessary.
1276 */
1277 if (flags & GPU_ALLOC_GVA_SPACE && vm->big_pages &&
1278 (base & ((vm->big_page_size << 10) - 1) ||
1279 size & ((vm->big_page_size << 10) - 1)))
1280 return -EINVAL;
1281
1282 a->flags = flags;
1283 a->max_order = max_order;
1284
1285 balloc_allocator_align(a);
1286 balloc_compute_max_order(a);
1287
1288 /* Shared buddy kmem_cache for all allocators. */
1289 if (!buddy_cache)
1290 buddy_cache = KMEM_CACHE(gk20a_buddy, 0);
1291 if (!buddy_cache) {
1292 err = -ENOMEM;
1293 goto fail;
1294 }
1295
1296 a->alloced_buddies = RB_ROOT;
1297 a->fixed_allocs = RB_ROOT;
1298 INIT_LIST_HEAD(&a->co_list);
1299 err = balloc_init_lists(a);
1300 if (err)
1301 goto fail;
1302
1303 wmb();
1304 a->initialized = 1;
1305
1306 gk20a_init_alloc_debug(g, __a);
1307 alloc_dbg(__a, "New allocator: type buddy\n");
1308 alloc_dbg(__a, " base 0x%llx\n", a->base);
1309 alloc_dbg(__a, " size 0x%llx\n", a->length);
1310 alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size);
1311 alloc_dbg(__a, " max_order %llu\n", a->max_order);
1312 alloc_dbg(__a, " flags 0x%llx\n", a->flags);
1313
1314 return 0;
1315
1316fail:
1317 kfree(a);
1318 return err;
1319}
1320
1321int gk20a_buddy_allocator_init(struct gk20a *g, struct gk20a_allocator *a,
1322 const char *name, u64 base, u64 size,
1323 u64 blk_size, u64 flags)
1324{
1325 return __gk20a_buddy_allocator_init(g, a, NULL, name,
1326 base, size, blk_size, 0, 0);
1327}