summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gk20a_allocator.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator.c1216
1 files changed, 15 insertions, 1201 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
index f2164768..b7e9a5e4 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
@@ -19,125 +19,13 @@
19#include <linux/kernel.h> 19#include <linux/kernel.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21 21
22#include "mm_gk20a.h"
22#include "platform_gk20a.h" 23#include "platform_gk20a.h"
23#include "gk20a_allocator.h" 24#include "gk20a_allocator.h"
24 25
25#include "mm_gk20a.h"
26
27static struct dentry *gk20a_alloc_debugfs_root;
28
29static struct kmem_cache *buddy_cache; /* slab cache for meta data. */
30
31u32 gk20a_alloc_tracing_on; 26u32 gk20a_alloc_tracing_on;
32 27
33#define gk20a_alloc_trace_func() \ 28static struct dentry *gk20a_alloc_debugfs_root;
34 do { \
35 if (gk20a_alloc_tracing_on) \
36 trace_printk("%s\n", __func__); \
37 } while (0)
38
39#define gk20a_alloc_trace_func_done() \
40 do { \
41 if (gk20a_alloc_tracing_on) \
42 trace_printk("%s_done\n", __func__); \
43 } while (0)
44
45/*
46 * Buddy allocator implementation.
47 */
48static u64 gk20a_buddy_alloc(struct gk20a_allocator *__a, u64 len);
49static void gk20a_buddy_free(struct gk20a_allocator *__a, u64 addr);
50static u64 gk20a_buddy_alloc_fixed(struct gk20a_allocator *__a,
51 u64 base, u64 len);
52static u64 gk20a_buddy_alloc_base(struct gk20a_allocator *a);
53static u64 gk20a_buddy_alloc_length(struct gk20a_allocator *a);
54static u64 gk20a_buddy_alloc_end(struct gk20a_allocator *a);
55static int gk20a_buddy_alloc_inited(struct gk20a_allocator *a);
56
57static void gk20a_buddy_allocator_destroy(struct gk20a_allocator *__a);
58static void gk20a_buddy_print_stats(struct gk20a_allocator *__a,
59 struct seq_file *s, int lock);
60
61/* Some other buddy allocator functions. */
62static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a,
63 u64 addr);
64static void balloc_coalesce(struct gk20a_buddy_allocator *a,
65 struct gk20a_buddy *b);
66static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a,
67 struct gk20a_fixed_alloc *falloc);
68
69/* Debugging. */
70static void gk20a_init_alloc_debug(struct gk20a_allocator *a);
71
72/*
73 * This function is not present in older kernel's list.h code.
74 */
75#ifndef list_last_entry
76#define list_last_entry(ptr, type, member) \
77 list_entry((ptr)->prev, type, member)
78#endif
79
80static const struct gk20a_allocator_ops buddy_ops = {
81 .alloc = gk20a_buddy_alloc,
82 .free = gk20a_buddy_free,
83
84 .alloc_fixed = gk20a_buddy_alloc_fixed,
85 /* .free_fixed not needed. */
86
87 .base = gk20a_buddy_alloc_base,
88 .length = gk20a_buddy_alloc_length,
89 .end = gk20a_buddy_alloc_end,
90 .inited = gk20a_buddy_alloc_inited,
91
92 .fini = gk20a_buddy_allocator_destroy,
93
94 .print_stats = gk20a_buddy_print_stats,
95};
96
97/*
98 * GPU buddy allocator for various address spaces.
99 *
100 * Current limitations:
101 * o A fixed allocation could potentially be made that borders PDEs with
102 * different PTE sizes. This would require that fixed buffer to have
103 * different sized PTEs for different parts of the allocation. Probably
104 * best to just require PDE alignment for fixed address allocs.
105 *
106 * o It is currently possible to make an allocator that has a buddy alignment
107 * out of sync with the PDE block size alignment. A simple example is a
108 * 32GB address space starting at byte 1. Every buddy is shifted off by 1
109 * which means each buddy corresponf to more than one actual GPU page. The
110 * best way to fix this is probably just require PDE blocksize alignment
111 * for the start of the address space. At the moment all allocators are
112 * easily PDE aligned so this hasn't been a problem.
113 */
114
115static u64 gk20a_buddy_alloc_length(struct gk20a_allocator *a)
116{
117 struct gk20a_buddy_allocator *ba = a->priv;
118
119 return ba->length;
120}
121
122static u64 gk20a_buddy_alloc_base(struct gk20a_allocator *a)
123{
124 struct gk20a_buddy_allocator *ba = a->priv;
125
126 return ba->start;
127}
128
129static int gk20a_buddy_alloc_inited(struct gk20a_allocator *a)
130{
131 struct gk20a_buddy_allocator *ba = a->priv;
132
133 return ba->inited;
134}
135static u64 gk20a_buddy_alloc_end(struct gk20a_allocator *a)
136{
137 struct gk20a_buddy_allocator *ba = a->priv;
138
139 return ba->end;
140}
141 29
142u64 gk20a_alloc_length(struct gk20a_allocator *a) 30u64 gk20a_alloc_length(struct gk20a_allocator *a)
143{ 31{
@@ -195,189 +83,11 @@ void gk20a_alloc_destroy(struct gk20a_allocator *a)
195} 83}
196 84
197/* 85/*
198 * Pick a suitable maximum order for this allocator.
199 *
200 * Hueristic: Just guessing that the best max order is the largest single
201 * block that will fit in the address space.
202 */
203static void balloc_compute_max_order(struct gk20a_buddy_allocator *a)
204{
205 u64 true_max_order = ilog2(a->blks);
206
207 if (a->max_order == 0) {
208 a->max_order = true_max_order;
209 return;
210 }
211
212 if (a->max_order > true_max_order)
213 a->max_order = true_max_order;
214 if (a->max_order > GPU_BALLOC_MAX_ORDER)
215 a->max_order = GPU_BALLOC_MAX_ORDER;
216}
217
218/*
219 * Since we can only allocate in chucks of a->blk_size we need to trim off
220 * any excess data that is not aligned to a->blk_size.
221 */
222static void balloc_allocator_align(struct gk20a_buddy_allocator *a)
223{
224 a->start = ALIGN(a->base, a->blk_size);
225 WARN_ON(a->start != a->base);
226 a->end = (a->base + a->length) & ~(a->blk_size - 1);
227 a->count = a->end - a->start;
228 a->blks = a->count >> a->blk_shift;
229}
230
231/*
232 * Pass NULL for parent if you want a top level buddy.
233 */
234static struct gk20a_buddy *balloc_new_buddy(struct gk20a_buddy_allocator *a,
235 struct gk20a_buddy *parent,
236 u64 start, u64 order)
237{
238 struct gk20a_buddy *new_buddy;
239
240 new_buddy = kmem_cache_alloc(buddy_cache, GFP_KERNEL);
241 if (!new_buddy)
242 return NULL;
243
244 memset(new_buddy, 0, sizeof(struct gk20a_buddy));
245
246 new_buddy->parent = parent;
247 new_buddy->start = start;
248 new_buddy->order = order;
249 new_buddy->end = start + (1 << order) * a->blk_size;
250
251 return new_buddy;
252}
253
254static void __balloc_buddy_list_add(struct gk20a_buddy_allocator *a,
255 struct gk20a_buddy *b,
256 struct list_head *list)
257{
258 if (buddy_is_in_list(b)) {
259 alloc_dbg(balloc_owner(a),
260 "Oops: adding added buddy (%llu:0x%llx)\n",
261 b->order, b->start);
262 BUG();
263 }
264
265 /*
266 * Add big PTE blocks to the tail, small to the head for GVA spaces.
267 * This lets the code that checks if there are available blocks check
268 * without cycling through the entire list.
269 */
270 if (a->flags & GPU_BALLOC_GVA_SPACE &&
271 b->pte_size == BALLOC_PTE_SIZE_BIG)
272 list_add_tail(&b->buddy_entry, list);
273 else
274 list_add(&b->buddy_entry, list);
275
276 buddy_set_in_list(b);
277}
278
279static void __balloc_buddy_list_rem(struct gk20a_buddy_allocator *a,
280 struct gk20a_buddy *b)
281{
282 if (!buddy_is_in_list(b)) {
283 alloc_dbg(balloc_owner(a),
284 "Oops: removing removed buddy (%llu:0x%llx)\n",
285 b->order, b->start);
286 BUG();
287 }
288
289 list_del_init(&b->buddy_entry);
290 buddy_clr_in_list(b);
291}
292
293/*
294 * Add a buddy to one of the buddy lists and deal with the necessary
295 * book keeping. Adds the buddy to the list specified by the buddy's order.
296 */
297static void balloc_blist_add(struct gk20a_buddy_allocator *a,
298 struct gk20a_buddy *b)
299{
300 __balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order));
301 a->buddy_list_len[b->order]++;
302}
303
304static void balloc_blist_rem(struct gk20a_buddy_allocator *a,
305 struct gk20a_buddy *b)
306{
307 __balloc_buddy_list_rem(a, b);
308 a->buddy_list_len[b->order]--;
309}
310
311static u64 balloc_get_order(struct gk20a_buddy_allocator *a, u64 len)
312{
313 if (len == 0)
314 return 0;
315
316 len--;
317 len >>= a->blk_shift;
318
319 return fls(len);
320}
321
322static u64 __balloc_max_order_in(struct gk20a_buddy_allocator *a,
323 u64 start, u64 end)
324{
325 u64 size = (end - start) >> a->blk_shift;
326
327 if (size > 0)
328 return min_t(u64, ilog2(size), a->max_order);
329 else
330 return GPU_BALLOC_MAX_ORDER;
331}
332
333/*
334 * Initialize the buddy lists.
335 */
336static int balloc_init_lists(struct gk20a_buddy_allocator *a)
337{
338 int i;
339 u64 bstart, bend, order;
340 struct gk20a_buddy *buddy;
341
342 bstart = a->start;
343 bend = a->end;
344
345 /* First make sure the LLs are valid. */
346 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++)
347 INIT_LIST_HEAD(balloc_get_order_list(a, i));
348
349 while (bstart < bend) {
350 order = __balloc_max_order_in(a, bstart, bend);
351
352 buddy = balloc_new_buddy(a, NULL, bstart, order);
353 if (!buddy)
354 goto cleanup;
355
356 balloc_blist_add(a, buddy);
357 bstart += balloc_order_to_len(a, order);
358 }
359
360 return 0;
361
362cleanup:
363 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) {
364 if (!list_empty(balloc_get_order_list(a, i))) {
365 buddy = list_first_entry(balloc_get_order_list(a, i),
366 struct gk20a_buddy, buddy_entry);
367 balloc_blist_rem(a, buddy);
368 kmem_cache_free(buddy_cache, buddy);
369 }
370 }
371
372 return -ENOMEM;
373}
374
375/*
376 * Handle the common init stuff for a gk20a_allocator. 86 * Handle the common init stuff for a gk20a_allocator.
377 */ 87 */
378static int __gk20a_alloc_common_init(struct gk20a_allocator *a, 88int __gk20a_alloc_common_init(struct gk20a_allocator *a,
379 const char *name, void *priv, 89 const char *name, void *priv,
380 const struct gk20a_allocator_ops *ops) 90 const struct gk20a_allocator_ops *ops)
381{ 91{
382 if (!ops) 92 if (!ops)
383 return -EINVAL; 93 return -EINVAL;
@@ -392,909 +102,6 @@ static int __gk20a_alloc_common_init(struct gk20a_allocator *a,
392 return 0; 102 return 0;
393} 103}
394 104
395/*
396 * Initialize a buddy allocator. Returns 0 on success. This allocator does
397 * not necessarily manage bytes. It manages distinct ranges of resources. This
398 * allows the allocator to work for things like comp_tags, semaphores, etc.
399 *
400 * @allocator: Ptr to an allocator struct to init.
401 * @vm: GPU VM to associate this allocator with. Can be NULL. Will be used to
402 * get PTE size for GVA spaces.
403 * @name: Name of the allocator. Doesn't have to be static storage.
404 * @base: The base address of the resource pool being managed.
405 * @size: Number of resources in the pool.
406 * @blk_size: Minimum number of resources to allocate at once. For things like
407 * semaphores this is 1. For GVA this might be as much as 64k. This
408 * corresponds to order 0. Must be power of 2.
409 * @max_order: Pick a maximum order. If you leave this as 0, the buddy allocator
410 * will try and pick a reasonable max order.
411 * @flags: Extra flags necessary. See GPU_BALLOC_*.
412 */
413int __gk20a_buddy_allocator_init(struct gk20a_allocator *__a,
414 struct vm_gk20a *vm, const char *name,
415 u64 base, u64 size, u64 blk_size,
416 u64 max_order, u64 flags)
417{
418 int err;
419 struct gk20a_buddy_allocator *a;
420
421 /* blk_size must be greater than 0 and a power of 2. */
422 if (blk_size == 0)
423 return -EINVAL;
424 if (blk_size & (blk_size - 1))
425 return -EINVAL;
426
427 if (max_order > GPU_BALLOC_MAX_ORDER)
428 return -EINVAL;
429
430 /* If this is to manage a GVA space we need a VM. */
431 if (flags & GPU_BALLOC_GVA_SPACE && !vm)
432 return -EINVAL;
433
434 a = kzalloc(sizeof(struct gk20a_buddy_allocator), GFP_KERNEL);
435 if (!a)
436 return -ENOMEM;
437
438 err = __gk20a_alloc_common_init(__a, name, a, &buddy_ops);
439 if (err)
440 goto fail;
441
442 a->base = base;
443 a->length = size;
444 a->blk_size = blk_size;
445 a->blk_shift = __ffs(blk_size);
446 a->owner = __a;
447
448 /*
449 * If base is 0 then modfy base to be the size of one block so that we
450 * can return errors by returning addr == 0.
451 */
452 if (a->base == 0) {
453 a->base = a->blk_size;
454 a->length -= a->blk_size;
455 }
456
457 a->vm = vm;
458 if (flags & GPU_BALLOC_GVA_SPACE)
459 a->pte_blk_order = balloc_get_order(a, vm->big_page_size << 10);
460
461 a->flags = flags;
462 a->max_order = max_order;
463
464 balloc_allocator_align(a);
465 balloc_compute_max_order(a);
466
467 /* Shared buddy kmem_cache for all allocators. */
468 if (!buddy_cache)
469 buddy_cache = KMEM_CACHE(gk20a_buddy, 0);
470 if (!buddy_cache) {
471 err = -ENOMEM;
472 goto fail;
473 }
474
475 a->alloced_buddies = RB_ROOT;
476 a->fixed_allocs = RB_ROOT;
477 err = balloc_init_lists(a);
478 if (err)
479 goto fail;
480
481 a->inited = 1;
482
483 gk20a_init_alloc_debug(__a);
484 alloc_dbg(__a, "New allocator: base 0x%llx\n", a->base);
485 alloc_dbg(__a, " size 0x%llx\n", a->length);
486 alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size);
487 alloc_dbg(__a, " max_order %llu\n", a->max_order);
488 alloc_dbg(__a, " flags 0x%llx\n", a->flags);
489
490 return 0;
491
492fail:
493 kfree(a);
494 return err;
495}
496
497int gk20a_buddy_allocator_init(struct gk20a_allocator *a, const char *name,
498 u64 base, u64 size, u64 blk_size, u64 flags)
499{
500 return __gk20a_buddy_allocator_init(a, NULL, name,
501 base, size, blk_size, 0, 0);
502}
503
504/*
505 * Clean up and destroy the passed allocator.
506 */
507static void gk20a_buddy_allocator_destroy(struct gk20a_allocator *__a)
508{
509 int i;
510 struct rb_node *node;
511 struct gk20a_buddy *bud;
512 struct gk20a_fixed_alloc *falloc;
513 struct gk20a_buddy_allocator *a = __a->priv;
514
515 alloc_lock(__a);
516
517 if (!IS_ERR_OR_NULL(__a->debugfs_entry))
518 debugfs_remove(__a->debugfs_entry);
519
520 /*
521 * Free the fixed allocs first.
522 */
523 while ((node = rb_first(&a->fixed_allocs)) != NULL) {
524 falloc = container_of(node,
525 struct gk20a_fixed_alloc, alloced_entry);
526
527 rb_erase(node, &a->fixed_allocs);
528 __balloc_do_free_fixed(a, falloc);
529 }
530
531 /*
532 * And now free all outstanding allocations.
533 */
534 while ((node = rb_first(&a->alloced_buddies)) != NULL) {
535 bud = container_of(node, struct gk20a_buddy, alloced_entry);
536 balloc_free_buddy(a, bud->start);
537 balloc_blist_add(a, bud);
538 balloc_coalesce(a, bud);
539 }
540
541 /*
542 * Now clean up the unallocated buddies.
543 */
544 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) {
545 BUG_ON(a->buddy_list_alloced[i] != 0);
546
547 while (!list_empty(balloc_get_order_list(a, i))) {
548 bud = list_first_entry(balloc_get_order_list(a, i),
549 struct gk20a_buddy, buddy_entry);
550 balloc_blist_rem(a, bud);
551 kmem_cache_free(buddy_cache, bud);
552 }
553
554 if (a->buddy_list_len[i] != 0) {
555 pr_info("Excess buddies!!! (%d: %llu)\n",
556 i, a->buddy_list_len[i]);
557 BUG();
558 }
559 if (a->buddy_list_split[i] != 0) {
560 pr_info("Excess split nodes!!! (%d: %llu)\n",
561 i, a->buddy_list_split[i]);
562 BUG();
563 }
564 if (a->buddy_list_alloced[i] != 0) {
565 pr_info("Excess alloced nodes!!! (%d: %llu)\n",
566 i, a->buddy_list_alloced[i]);
567 BUG();
568 }
569 }
570
571 kfree(a);
572
573 alloc_unlock(__a);
574}
575
576/*
577 * Combine the passed buddy if possible. The pointer in @b may not be valid
578 * after this as the buddy may be freed.
579 *
580 * @a must be locked.
581 */
582static void balloc_coalesce(struct gk20a_buddy_allocator *a,
583 struct gk20a_buddy *b)
584{
585 struct gk20a_buddy *parent;
586
587 if (buddy_is_alloced(b) || buddy_is_split(b))
588 return;
589
590 /*
591 * If both our buddy and I are both not allocated and not split then
592 * we can coalesce ourselves.
593 */
594 if (!b->buddy)
595 return;
596 if (buddy_is_alloced(b->buddy) || buddy_is_split(b->buddy))
597 return;
598
599 parent = b->parent;
600
601 balloc_blist_rem(a, b);
602 balloc_blist_rem(a, b->buddy);
603
604 buddy_clr_split(parent);
605 a->buddy_list_split[parent->order]--;
606 balloc_blist_add(a, parent);
607
608 /*
609 * Recursively coalesce as far as we can go.
610 */
611 balloc_coalesce(a, parent);
612
613 /* Clean up the remains. */
614 kmem_cache_free(buddy_cache, b->buddy);
615 kmem_cache_free(buddy_cache, b);
616}
617
618/*
619 * Split a buddy into two new buddies who are 1/2 the size of the parent buddy.
620 *
621 * @a must be locked.
622 */
623static int balloc_split_buddy(struct gk20a_buddy_allocator *a,
624 struct gk20a_buddy *b, int pte_size)
625{
626 struct gk20a_buddy *left, *right;
627 u64 half;
628
629 left = balloc_new_buddy(a, b, b->start, b->order - 1);
630 if (!left)
631 return -ENOMEM;
632
633 half = (b->end - b->start) / 2;
634
635 right = balloc_new_buddy(a, b, b->start + half, b->order - 1);
636 if (!right) {
637 kmem_cache_free(buddy_cache, left);
638 return -ENOMEM;
639 }
640
641 buddy_set_split(b);
642 a->buddy_list_split[b->order]++;
643
644 b->left = left;
645 b->right = right;
646 left->buddy = right;
647 right->buddy = left;
648 left->parent = b;
649 right->parent = b;
650
651 /* PTE considerations. */
652 if (a->flags & GPU_BALLOC_GVA_SPACE &&
653 left->order <= a->pte_blk_order) {
654 left->pte_size = pte_size;
655 right->pte_size = pte_size;
656 }
657
658 balloc_blist_rem(a, b);
659 balloc_blist_add(a, left);
660 balloc_blist_add(a, right);
661
662 return 0;
663}
664
665/*
666 * Place the passed buddy into the RB tree for allocated buddies. Never fails
667 * unless the passed entry is a duplicate which is a bug.
668 *
669 * @a must be locked.
670 */
671static void balloc_alloc_buddy(struct gk20a_buddy_allocator *a,
672 struct gk20a_buddy *b)
673{
674 struct rb_node **new = &(a->alloced_buddies.rb_node);
675 struct rb_node *parent = NULL;
676
677 while (*new) {
678 struct gk20a_buddy *bud = container_of(*new, struct gk20a_buddy,
679 alloced_entry);
680
681 parent = *new;
682 if (b->start < bud->start)
683 new = &((*new)->rb_left);
684 else if (b->start > bud->start)
685 new = &((*new)->rb_right);
686 else
687 BUG_ON("Duplicate entries in allocated list!\n");
688 }
689
690 rb_link_node(&b->alloced_entry, parent, new);
691 rb_insert_color(&b->alloced_entry, &a->alloced_buddies);
692
693 buddy_set_alloced(b);
694 a->buddy_list_alloced[b->order]++;
695}
696
697/*
698 * Remove the passed buddy from the allocated buddy RB tree. Returns the
699 * deallocated buddy for further processing.
700 *
701 * @a must be locked.
702 */
703static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a,
704 u64 addr)
705{
706 struct rb_node *node = a->alloced_buddies.rb_node;
707 struct gk20a_buddy *bud;
708
709 while (node) {
710 bud = container_of(node, struct gk20a_buddy, alloced_entry);
711
712 if (addr < bud->start)
713 node = node->rb_left;
714 else if (addr > bud->start)
715 node = node->rb_right;
716 else
717 break;
718 }
719
720 if (!node)
721 return NULL;
722
723 rb_erase(node, &a->alloced_buddies);
724 buddy_clr_alloced(bud);
725 a->buddy_list_alloced[bud->order]--;
726
727 return bud;
728}
729
730/*
731 * Find a suitable buddy for the given order and PTE type (big or little).
732 */
733static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_buddy_allocator *a,
734 u64 order, int pte_size)
735{
736 struct gk20a_buddy *bud;
737
738 if (order > a->max_order ||
739 list_empty(balloc_get_order_list(a, order)))
740 return NULL;
741
742 if (a->flags & GPU_BALLOC_GVA_SPACE &&
743 pte_size == BALLOC_PTE_SIZE_BIG)
744 bud = list_last_entry(balloc_get_order_list(a, order),
745 struct gk20a_buddy, buddy_entry);
746 else
747 bud = list_first_entry(balloc_get_order_list(a, order),
748 struct gk20a_buddy, buddy_entry);
749
750 if (bud->pte_size != BALLOC_PTE_SIZE_ANY &&
751 bud->pte_size != pte_size)
752 return NULL;
753
754 return bud;
755}
756
757/*
758 * Allocate a suitably sized buddy. If no suitable buddy exists split higher
759 * order buddies until we have a suitable buddy to allocate.
760 *
761 * For PDE grouping add an extra check to see if a buddy is suitable: that the
762 * buddy exists in a PDE who's PTE size is reasonable
763 *
764 * @a must be locked.
765 */
766static u64 __balloc_do_alloc(struct gk20a_buddy_allocator *a,
767 u64 order, int pte_size)
768{
769 u64 split_order;
770 struct gk20a_buddy *bud = NULL;
771
772 split_order = order;
773 while (split_order <= a->max_order &&
774 !(bud = __balloc_find_buddy(a, split_order, pte_size)))
775 split_order++;
776
777 /* Out of memory! */
778 if (!bud)
779 return 0;
780
781 while (bud->order != order) {
782 if (balloc_split_buddy(a, bud, pte_size))
783 return 0; /* No mem... */
784 bud = bud->left;
785 }
786
787 balloc_blist_rem(a, bud);
788 balloc_alloc_buddy(a, bud);
789
790 return bud->start;
791}
792
793/*
794 * Allocate memory from the passed allocator.
795 */
796static u64 gk20a_buddy_alloc(struct gk20a_allocator *__a, u64 len)
797{
798 u64 order, addr;
799 int pte_size;
800 struct gk20a_buddy_allocator *a = __a->priv;
801
802 gk20a_alloc_trace_func();
803
804 alloc_lock(__a);
805
806 order = balloc_get_order(a, len);
807
808 if (order > a->max_order) {
809 alloc_unlock(__a);
810 alloc_dbg(balloc_owner(a), "Alloc fail\n");
811 gk20a_alloc_trace_func_done();
812 return 0;
813 }
814
815 /*
816 * For now pass the base address of the allocator's region to
817 * __get_pte_size(). This ensures we get the right page size for
818 * the alloc but we don't have to know what the real address is
819 * going to be quite yet.
820 *
821 * TODO: once userspace supports a unified address space pass 0 for
822 * the base. This will make only 'len' affect the PTE size.
823 */
824 if (a->flags & GPU_BALLOC_GVA_SPACE)
825 pte_size = __get_pte_size(a->vm, a->base, len);
826 else
827 pte_size = BALLOC_PTE_SIZE_ANY;
828
829 addr = __balloc_do_alloc(a, order, pte_size);
830
831 if (addr) {
832 a->bytes_alloced += len;
833 a->bytes_alloced_real += balloc_order_to_len(a, order);
834 alloc_dbg(balloc_owner(a),
835 "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n",
836 addr, order, len,
837 pte_size == gmmu_page_size_big ? "big" :
838 pte_size == gmmu_page_size_small ? "small" :
839 "NA/any");
840 } else {
841 alloc_dbg(balloc_owner(a), "Alloc failed: no mem!\n");
842 }
843
844 alloc_unlock(__a);
845
846 gk20a_alloc_trace_func_done();
847 return addr;
848}
849
850/*
851 * See if the passed range is actually available for allocation. If so, then
852 * return 1, otherwise return 0.
853 *
854 * TODO: Right now this uses the unoptimal approach of going through all
855 * outstanding allocations and checking their base/ends. This could be better.
856 */
857static int balloc_is_range_free(struct gk20a_buddy_allocator *a,
858 u64 base, u64 end)
859{
860 struct rb_node *node;
861 struct gk20a_buddy *bud;
862
863 node = rb_first(&a->alloced_buddies);
864 if (!node)
865 return 1; /* No allocs yet. */
866
867 bud = container_of(node, struct gk20a_buddy, alloced_entry);
868
869 while (bud->start < end) {
870 if ((bud->start > base && bud->start < end) ||
871 (bud->end > base && bud->end < end))
872 return 0;
873
874 node = rb_next(node);
875 if (!node)
876 break;
877 bud = container_of(node, struct gk20a_buddy, alloced_entry);
878 }
879
880 return 1;
881}
882
883static void balloc_alloc_fixed(struct gk20a_buddy_allocator *a,
884 struct gk20a_fixed_alloc *f)
885{
886 struct rb_node **new = &(a->fixed_allocs.rb_node);
887 struct rb_node *parent = NULL;
888
889 while (*new) {
890 struct gk20a_fixed_alloc *falloc =
891 container_of(*new, struct gk20a_fixed_alloc,
892 alloced_entry);
893
894 parent = *new;
895 if (f->start < falloc->start)
896 new = &((*new)->rb_left);
897 else if (f->start > falloc->start)
898 new = &((*new)->rb_right);
899 else
900 BUG_ON("Duplicate entries in allocated list!\n");
901 }
902
903 rb_link_node(&f->alloced_entry, parent, new);
904 rb_insert_color(&f->alloced_entry, &a->fixed_allocs);
905}
906
907/*
908 * Remove the passed buddy from the allocated buddy RB tree. Returns the
909 * deallocated buddy for further processing.
910 *
911 * @a must be locked.
912 */
913static struct gk20a_fixed_alloc *balloc_free_fixed(
914 struct gk20a_buddy_allocator *a, u64 addr)
915{
916 struct rb_node *node = a->fixed_allocs.rb_node;
917 struct gk20a_fixed_alloc *falloc;
918
919 while (node) {
920 falloc = container_of(node,
921 struct gk20a_fixed_alloc, alloced_entry);
922
923 if (addr < falloc->start)
924 node = node->rb_left;
925 else if (addr > falloc->start)
926 node = node->rb_right;
927 else
928 break;
929 }
930
931 if (!node)
932 return NULL;
933
934 rb_erase(node, &a->fixed_allocs);
935
936 return falloc;
937}
938
939/*
940 * Find the parent range - doesn't necessarily need the parent to actually exist
941 * as a buddy. Finding an existing parent comes later...
942 */
943static void __balloc_get_parent_range(struct gk20a_buddy_allocator *a,
944 u64 base, u64 order,
945 u64 *pbase, u64 *porder)
946{
947 u64 base_mask;
948 u64 shifted_base = balloc_base_shift(a, base);
949
950 order++;
951 base_mask = ~((a->blk_size << order) - 1);
952
953 shifted_base &= base_mask;
954
955 *pbase = balloc_base_unshift(a, shifted_base);
956 *porder = order;
957}
958
959/*
960 * Makes a buddy at the passed address. This will make all parent buddies
961 * necessary for this buddy to exist as well.
962 */
963static struct gk20a_buddy *__balloc_make_fixed_buddy(
964 struct gk20a_buddy_allocator *a, u64 base, u64 order)
965{
966 struct gk20a_buddy *bud = NULL;
967 struct list_head *order_list;
968 u64 cur_order = order, cur_base = base;
969
970 /*
971 * Algo:
972 * 1. Keep jumping up a buddy order until we find the real buddy that
973 * this buddy exists in.
974 * 2. Then work our way down through the buddy tree until we hit a dead
975 * end.
976 * 3. Start splitting buddies until we split to the one we need to
977 * make.
978 */
979 while (cur_order <= a->max_order) {
980 int found = 0;
981
982 order_list = balloc_get_order_list(a, cur_order);
983 list_for_each_entry(bud, order_list, buddy_entry) {
984 if (bud->start == cur_base) {
985 found = 1;
986 break;
987 }
988 }
989
990 if (found)
991 break;
992
993 __balloc_get_parent_range(a, cur_base, cur_order,
994 &cur_base, &cur_order);
995 }
996
997 if (cur_order > a->max_order) {
998 alloc_dbg(balloc_owner(a), "No buddy for range ???\n");
999 return NULL;
1000 }
1001
1002 /* Split this buddy as necessary until we get the target buddy. */
1003 while (bud->start != base || bud->order != order) {
1004 if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) {
1005 balloc_coalesce(a, bud);
1006 return NULL;
1007 }
1008
1009 if (base < bud->right->start)
1010 bud = bud->left;
1011 else
1012 bud = bud->right;
1013
1014 }
1015
1016 return bud;
1017}
1018
1019static u64 __balloc_do_alloc_fixed(struct gk20a_buddy_allocator *a,
1020 struct gk20a_fixed_alloc *falloc,
1021 u64 base, u64 len)
1022{
1023 u64 shifted_base, inc_base;
1024 u64 align_order;
1025
1026 shifted_base = balloc_base_shift(a, base);
1027 if (shifted_base == 0)
1028 align_order = __fls(len >> a->blk_shift);
1029 else
1030 align_order = min_t(u64,
1031 __ffs(shifted_base >> a->blk_shift),
1032 __fls(len >> a->blk_shift));
1033
1034 if (align_order > a->max_order) {
1035 alloc_dbg(balloc_owner(a),
1036 "Align order too big: %llu > %llu\n",
1037 align_order, a->max_order);
1038 return 0;
1039 }
1040
1041 /*
1042 * Generate a list of buddies that satisfy this allocation.
1043 */
1044 inc_base = shifted_base;
1045 while (inc_base < (shifted_base + len)) {
1046 u64 order_len = balloc_order_to_len(a, align_order);
1047 u64 remaining;
1048 struct gk20a_buddy *bud;
1049
1050 bud = __balloc_make_fixed_buddy(a,
1051 balloc_base_unshift(a, inc_base),
1052 align_order);
1053 if (!bud) {
1054 alloc_dbg(balloc_owner(a),
1055 "Fixed buddy failed: {0x%llx, %llu}!\n",
1056 balloc_base_unshift(a, inc_base),
1057 align_order);
1058 goto err_and_cleanup;
1059 }
1060
1061 balloc_blist_rem(a, bud);
1062 balloc_alloc_buddy(a, bud);
1063 __balloc_buddy_list_add(a, bud, &falloc->buddies);
1064
1065 /* Book keeping. */
1066 inc_base += order_len;
1067 remaining = (shifted_base + len) - inc_base;
1068 align_order = __ffs(inc_base >> a->blk_shift);
1069
1070 /* If we don't have much left - trim down align_order. */
1071 if (balloc_order_to_len(a, align_order) > remaining)
1072 align_order = __balloc_max_order_in(a, inc_base,
1073 inc_base + remaining);
1074 }
1075
1076 return base;
1077
1078err_and_cleanup:
1079 while (!list_empty(&falloc->buddies)) {
1080 struct gk20a_buddy *bud = list_first_entry(&falloc->buddies,
1081 struct gk20a_buddy,
1082 buddy_entry);
1083
1084 __balloc_buddy_list_rem(a, bud);
1085 balloc_free_buddy(a, bud->start);
1086 kmem_cache_free(buddy_cache, bud);
1087 }
1088
1089 return 0;
1090}
1091
1092/*
1093 * Allocate a fixed address allocation. The address of the allocation is @base
1094 * and the length is @len. This is not a typical buddy allocator operation and
1095 * as such has a high posibility of failure if the address space is heavily in
1096 * use.
1097 *
1098 * Please do not use this function unless _absolutely_ necessary.
1099 */
1100static u64 gk20a_buddy_alloc_fixed(struct gk20a_allocator *__a,
1101 u64 base, u64 len)
1102{
1103 u64 ret, real_bytes = 0;
1104 struct gk20a_buddy *bud;
1105 struct gk20a_fixed_alloc *falloc = NULL;
1106 struct gk20a_buddy_allocator *a = __a->priv;
1107
1108 gk20a_alloc_trace_func();
1109
1110 /* If base isn't aligned to an order 0 block, fail. */
1111 if (base & (a->blk_size - 1))
1112 goto fail;
1113
1114 if (len == 0)
1115 goto fail;
1116
1117 falloc = kmalloc(sizeof(*falloc), GFP_KERNEL);
1118 if (!falloc)
1119 goto fail;
1120
1121 INIT_LIST_HEAD(&falloc->buddies);
1122 falloc->start = base;
1123 falloc->end = base + len;
1124
1125 alloc_lock(__a);
1126 if (!balloc_is_range_free(a, base, base + len)) {
1127 alloc_dbg(balloc_owner(a),
1128 "Range not free: 0x%llx -> 0x%llx\n",
1129 base, base + len);
1130 goto fail_unlock;
1131 }
1132
1133 ret = __balloc_do_alloc_fixed(a, falloc, base, len);
1134 if (!ret) {
1135 alloc_dbg(balloc_owner(a),
1136 "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n",
1137 base, base + len);
1138 goto fail_unlock;
1139 }
1140
1141 balloc_alloc_fixed(a, falloc);
1142
1143 list_for_each_entry(bud, &falloc->buddies, buddy_entry)
1144 real_bytes += (bud->end - bud->start);
1145
1146 a->bytes_alloced += len;
1147 a->bytes_alloced_real += real_bytes;
1148
1149 alloc_unlock(__a);
1150 alloc_dbg(balloc_owner(a), "Alloc (fixed) 0x%llx\n", base);
1151
1152 gk20a_alloc_trace_func_done();
1153 return base;
1154
1155fail_unlock:
1156 alloc_unlock(__a);
1157fail:
1158 kfree(falloc);
1159 gk20a_alloc_trace_func_done();
1160 return 0;
1161}
1162
1163static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a,
1164 struct gk20a_fixed_alloc *falloc)
1165{
1166 struct gk20a_buddy *bud;
1167
1168 while (!list_empty(&falloc->buddies)) {
1169 bud = list_first_entry(&falloc->buddies,
1170 struct gk20a_buddy,
1171 buddy_entry);
1172 __balloc_buddy_list_rem(a, bud);
1173
1174 balloc_free_buddy(a, bud->start);
1175 balloc_blist_add(a, bud);
1176 a->bytes_freed += balloc_order_to_len(a, bud->order);
1177
1178 /*
1179 * Attemp to defrag the allocation.
1180 */
1181 balloc_coalesce(a, bud);
1182 }
1183
1184 kfree(falloc);
1185}
1186
1187/*
1188 * Free the passed allocation.
1189 */
1190static void gk20a_buddy_free(struct gk20a_allocator *__a, u64 addr)
1191{
1192 struct gk20a_buddy *bud;
1193 struct gk20a_fixed_alloc *falloc;
1194 struct gk20a_buddy_allocator *a = __a->priv;
1195
1196 gk20a_alloc_trace_func();
1197
1198 if (!addr) {
1199 gk20a_alloc_trace_func_done();
1200 return;
1201 }
1202
1203 alloc_lock(__a);
1204
1205 /*
1206 * First see if this is a fixed alloc. If not fall back to a regular
1207 * buddy.
1208 */
1209 falloc = balloc_free_fixed(a, addr);
1210 if (falloc) {
1211 __balloc_do_free_fixed(a, falloc);
1212 goto done;
1213 }
1214
1215 bud = balloc_free_buddy(a, addr);
1216 if (!bud)
1217 goto done;
1218
1219 balloc_blist_add(a, bud);
1220 a->bytes_freed += balloc_order_to_len(a, bud->order);
1221
1222 /*
1223 * Attemp to defrag the allocation.
1224 */
1225 balloc_coalesce(a, bud);
1226
1227done:
1228 alloc_unlock(__a);
1229 alloc_dbg(balloc_owner(a), "Free 0x%llx\n", addr);
1230 gk20a_alloc_trace_func_done();
1231 return;
1232}
1233
1234/*
1235 * Print the buddy allocator top level stats. If you pass @s as NULL then the
1236 * stats are printed to the kernel log. This lets this code be used for
1237 * debugging purposes internal to the allocator.
1238 */
1239static void gk20a_buddy_print_stats(struct gk20a_allocator *__a,
1240 struct seq_file *s, int lock)
1241{
1242 int i;
1243 struct rb_node *node;
1244 struct gk20a_fixed_alloc *falloc;
1245 struct gk20a_buddy_allocator *a = __a->priv;
1246
1247 __alloc_pstat(s, __a, "base = %llu, limit = %llu, blk_size = %llu\n",
1248 a->base, a->length, a->blk_size);
1249 __alloc_pstat(s, __a, "Internal params:\n");
1250 __alloc_pstat(s, __a, " start = 0x%llx\n", a->start);
1251 __alloc_pstat(s, __a, " end = 0x%llx\n", a->end);
1252 __alloc_pstat(s, __a, " count = 0x%llx\n", a->count);
1253 __alloc_pstat(s, __a, " blks = 0x%llx\n", a->blks);
1254 __alloc_pstat(s, __a, " max_order = %llu\n", a->max_order);
1255
1256 __alloc_pstat(s, __a, "Buddy blocks:\n");
1257 __alloc_pstat(s, __a, " Order Free Alloced Split\n");
1258 __alloc_pstat(s, __a, " ----- ---- ------- -----\n");
1259
1260 if (lock)
1261 alloc_lock(__a);
1262 for (i = a->max_order; i >= 0; i--) {
1263 if (a->buddy_list_len[i] == 0 &&
1264 a->buddy_list_alloced[i] == 0 &&
1265 a->buddy_list_split[i] == 0)
1266 continue;
1267
1268 __alloc_pstat(s, __a, " %3d %-7llu %-9llu %llu\n", i,
1269 a->buddy_list_len[i],
1270 a->buddy_list_alloced[i],
1271 a->buddy_list_split[i]);
1272 }
1273
1274 __alloc_pstat(s, __a, "\n");
1275
1276 for (node = rb_first(&a->fixed_allocs), i = 1;
1277 node != NULL;
1278 node = rb_next(node)) {
1279 falloc = container_of(node,
1280 struct gk20a_fixed_alloc, alloced_entry);
1281
1282 __alloc_pstat(s, __a, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n",
1283 i, falloc->start, falloc->end);
1284 }
1285
1286 __alloc_pstat(s, __a, "\n");
1287 __alloc_pstat(s, __a, "Bytes allocated: %llu\n",
1288 a->bytes_alloced);
1289 __alloc_pstat(s, __a, "Bytes allocated (real): %llu\n",
1290 a->bytes_alloced_real);
1291 __alloc_pstat(s, __a, "Bytes freed: %llu\n",
1292 a->bytes_freed);
1293
1294 if (lock)
1295 alloc_unlock(__a);
1296}
1297
1298void gk20a_alloc_print_stats(struct gk20a_allocator *__a, 105void gk20a_alloc_print_stats(struct gk20a_allocator *__a,
1299 struct seq_file *s, int lock) 106 struct seq_file *s, int lock)
1300{ 107{
@@ -1322,7 +129,7 @@ static const struct file_operations __alloc_fops = {
1322 .release = single_release, 129 .release = single_release,
1323}; 130};
1324 131
1325static void gk20a_init_alloc_debug(struct gk20a_allocator *a) 132void gk20a_init_alloc_debug(struct gk20a_allocator *a)
1326{ 133{
1327 if (!gk20a_alloc_debugfs_root) 134 if (!gk20a_alloc_debugfs_root)
1328 return; 135 return;
@@ -1332,7 +139,15 @@ static void gk20a_init_alloc_debug(struct gk20a_allocator *a)
1332 a, &__alloc_fops); 139 a, &__alloc_fops);
1333} 140}
1334 141
1335#ifdef CONFIG_DEBUG_FS 142void gk20a_fini_alloc_debug(struct gk20a_allocator *a)
143{
144 if (!gk20a_alloc_debugfs_root)
145 return;
146
147 if (!IS_ERR_OR_NULL(a->debugfs_entry))
148 debugfs_remove(a->debugfs_entry);
149}
150
1336void gk20a_alloc_debugfs_init(struct platform_device *pdev) 151void gk20a_alloc_debugfs_init(struct platform_device *pdev)
1337{ 152{
1338 struct gk20a_platform *platform = platform_get_drvdata(pdev); 153 struct gk20a_platform *platform = platform_get_drvdata(pdev);
@@ -1345,4 +160,3 @@ void gk20a_alloc_debugfs_init(struct platform_device *pdev)
1345 debugfs_create_u32("tracing", 0664, gk20a_alloc_debugfs_root, 160 debugfs_create_u32("tracing", 0664, gk20a_alloc_debugfs_root,
1346 &gk20a_alloc_tracing_on); 161 &gk20a_alloc_tracing_on);
1347} 162}
1348#endif