summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2015-03-18 16:33:09 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2015-05-11 11:53:25 -0400
commita2e852364582e9c337f52bc53ccc33877c8f3b47 (patch)
treefb13c5ad80db8eb2424a753a92389c7a3a322a12 /drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
parent0566aee853eb32f4f796499b6b00ddf0f1d7de34 (diff)
gpu: nvgpu: New allocator for VA space
Implement a new buddy allocation scheme for the GPU's VA space. The bitmap allocator was using too much memory and is not a scaleable solution as the GPU's address space keeps getting bigger. The buddy allocation scheme is much more memory efficient when the majority of the address space is not allocated. The buddy allocator is not constrained by the notion of a split address space. The bitmap allocator could only manage either small pages or large pages but not both at the same time. Thus the bottom of the address space was for small pages, the top for large pages. Although, that split is not removed quite yet, the new allocator enables that to happen. The buddy allocator is also very scalable. It manages the relatively small comptag space to the enormous GPU VA space and everything in between. This is important since the GPU has lots of different sized spaces that need managing. Currently there are certain limitations. For one the allocator does not handle the fixed allocations from CUDA very well. It can do so but with certain caveats. The PTE page size is always set to small. This means the BA may place other small page allocations in the buddies around the fixed allocation. It does this to avoid having large and small page allocations in the same PDE. Change-Id: I501cd15af03611536490137331d43761c402c7f9 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/740694 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gk20a_allocator.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator.c1167
1 files changed, 1102 insertions, 65 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
index 0037257c..56fb22df 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * gk20a allocator 2 * gk20a allocator
3 * 3 *
4 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -16,112 +16,1149 @@
16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */ 17 */
18 18
19#include <linux/kernel.h>
20#include <linux/seq_file.h>
21#include <linux/slab.h>
22#include <linux/debugfs.h>
23
24#include "platform_gk20a.h"
19#include "gk20a_allocator.h" 25#include "gk20a_allocator.h"
20#include <linux/vmalloc.h>
21 26
22/* init allocator struct */ 27#include "mm_gk20a.h"
23int gk20a_allocator_init(struct gk20a_allocator *allocator, 28
24 const char *name, u32 start, u32 len) 29static struct dentry *balloc_debugfs_root;
30
31static struct kmem_cache *buddy_cache; /* slab cache for meta data. */
32
33static u32 balloc_tracing_on;
34
35#define balloc_trace_func() \
36 do { \
37 if (balloc_tracing_on) \
38 trace_printk("%s\n", __func__); \
39 } while (0)
40
41#define balloc_trace_func_done() \
42 do { \
43 if (balloc_tracing_on) \
44 trace_printk("%s_done\n", __func__); \
45 } while (0)
46
47
48static void balloc_init_alloc_debug(struct gk20a_allocator *a);
49static void balloc_print_stats(struct gk20a_allocator *a, struct seq_file *s,
50 int lock);
51static struct gk20a_buddy *balloc_free_buddy(struct gk20a_allocator *a,
52 u64 addr);
53static void balloc_coalesce(struct gk20a_allocator *a, struct gk20a_buddy *b);
54static void __balloc_do_free_fixed(struct gk20a_allocator *a,
55 struct gk20a_fixed_alloc *falloc);
56
57/*
58 * This function is not present in older kernel's list.h code.
59 */
60#ifndef list_last_entry
61#define list_last_entry(ptr, type, member) \
62 list_entry((ptr)->prev, type, member)
63#endif
64
65/*
66 * GPU buddy allocator for various address spaces.
67 *
68 * Current limitations:
69 * o A fixed allocation could potentially be made that borders PDEs with
70 * different PTE sizes. This would require that fixed buffer to have
71 * different sized PTEs for different parts of the allocation. Probably
72 * best to just require PDE alignment for fixed address allocs.
73 *
74 * o It is currently possible to make an allocator that has a buddy alignment
75 * out of sync with the PDE block size alignment. A simple example is a
76 * 32GB address space starting at byte 1. Every buddy is shifted off by 1
77 * which means each buddy corresponf to more than one actual GPU page. The
78 * best way to fix this is probably just require PDE blocksize alignment
79 * for the start of the address space. At the moment all allocators are
80 * easily PDE aligned so this hasn't been a problem.
81 */
82
83/*
84 * Pick a suitable maximum order for this allocator.
85 *
86 * Hueristic: Just guessing that the best max order is the largest single
87 * block that will fit in the address space.
88 */
89static void balloc_compute_max_order(struct gk20a_allocator *a)
90{
91 u64 true_max_order = ilog2(a->blks);
92
93 if (a->max_order > true_max_order)
94 a->max_order = true_max_order;
95 if (a->max_order > GPU_BALLOC_MAX_ORDER)
96 a->max_order = GPU_BALLOC_MAX_ORDER;
97}
98
99/*
100 * Since we can only allocate in chucks of a->blk_size we need to trim off
101 * any excess data that is not aligned to a->blk_size.
102 */
103static void balloc_allocator_align(struct gk20a_allocator *a)
104{
105 a->start = ALIGN(a->base, a->blk_size);
106 a->end = (a->base + a->length) & ~(a->blk_size - 1);
107 a->count = a->end - a->start;
108 a->blks = a->count >> a->blk_shift;
109}
110
111/*
112 * Pass NULL for parent if you want a top level buddy.
113 */
114static struct gk20a_buddy *balloc_new_buddy(struct gk20a_allocator *a,
115 struct gk20a_buddy *parent,
116 u64 start, u64 order)
117{
118 struct gk20a_buddy *new_buddy;
119
120 new_buddy = kmem_cache_alloc(buddy_cache, GFP_KERNEL);
121 if (!new_buddy)
122 return NULL;
123
124 memset(new_buddy, 0, sizeof(struct gk20a_buddy));
125
126 new_buddy->parent = parent;
127 new_buddy->start = start;
128 new_buddy->order = order;
129 new_buddy->end = start + (1 << order) * a->blk_size;
130
131 return new_buddy;
132}
133
134static void __balloc_buddy_list_add(struct gk20a_allocator *a,
135 struct gk20a_buddy *b,
136 struct list_head *list)
137{
138 if (buddy_is_in_list(b)) {
139 balloc_dbg(a, "Oops: adding added buddy (%llu:0x%llx)\n",
140 b->order, b->start);
141 BUG();
142 }
143
144 /*
145 * Add big PTE blocks to the tail, small to the head for GVA spaces.
146 * This lets the code that checks if there are available blocks check
147 * without cycling through the entire list.
148 */
149 if (a->flags & GPU_BALLOC_GVA_SPACE &&
150 b->pte_size == BALLOC_PTE_SIZE_BIG)
151 list_add_tail(&b->buddy_entry, list);
152 else
153 list_add(&b->buddy_entry, list);
154
155 buddy_set_in_list(b);
156}
157
158static void __balloc_buddy_list_rem(struct gk20a_allocator *a,
159 struct gk20a_buddy *b)
160{
161 if (!buddy_is_in_list(b)) {
162 balloc_dbg(a, "Oops: removing removed buddy (%llu:0x%llx)\n",
163 b->order, b->start);
164 BUG();
165 }
166
167 list_del_init(&b->buddy_entry);
168 buddy_clr_in_list(b);
169}
170
171/*
172 * Add a buddy to one of the buddy lists and deal with the necessary
173 * book keeping. Adds the buddy to the list specified by the buddy's order.
174 */
175static void balloc_blist_add(struct gk20a_allocator *a, struct gk20a_buddy *b)
176{
177 __balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order));
178 a->buddy_list_len[b->order]++;
179}
180
181static void balloc_blist_rem(struct gk20a_allocator *a, struct gk20a_buddy *b)
182{
183 __balloc_buddy_list_rem(a, b);
184 a->buddy_list_len[b->order]--;
185}
186
187static u64 balloc_get_order(struct gk20a_allocator *a, u64 len)
188{
189 if (len == 0)
190 return 0;
191
192 len--;
193 len >>= a->blk_shift;
194
195 return fls(len);
196}
197
198static u64 __balloc_max_order_in(struct gk20a_allocator *a, u64 start, u64 end)
199{
200 u64 size = (end - start) >> a->blk_shift;
201
202 if (size > 0)
203 return min_t(u64, ilog2(size), a->max_order);
204 else
205 return GPU_BALLOC_MAX_ORDER;
206}
207
208/*
209 * Initialize the buddy lists.
210 */
211static int balloc_init_lists(struct gk20a_allocator *a)
212{
213 int i;
214 u64 bstart, bend, order;
215 struct gk20a_buddy *buddy;
216
217 bstart = a->start;
218 bend = a->end;
219
220 /* First make sure the LLs are valid. */
221 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++)
222 INIT_LIST_HEAD(balloc_get_order_list(a, i));
223
224 while (bstart < bend) {
225 order = __balloc_max_order_in(a, bstart, bend);
226
227 buddy = balloc_new_buddy(a, NULL, bstart, order);
228 if (!buddy)
229 goto cleanup;
230
231 balloc_blist_add(a, buddy);
232 bstart += balloc_order_to_len(a, order);
233 }
234
235 return 0;
236
237cleanup:
238 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) {
239 if (!list_empty(balloc_get_order_list(a, i))) {
240 buddy = list_first_entry(balloc_get_order_list(a, i),
241 struct gk20a_buddy, buddy_entry);
242 balloc_blist_rem(a, buddy);
243 kmem_cache_free(buddy_cache, buddy);
244 }
245 }
246
247 return -ENOMEM;
248}
249
250/*
251 * Initialize a buddy allocator. Returns 0 on success. This allocator does
252 * not necessarily manage bytes. It manages distinct ranges of resources. This
253 * allows the allocator to work for things like comp_tags, semaphores, etc.
254 *
255 * @allocator: Ptr to an allocator struct to init.
256 * @vm: GPU VM to associate this allocator with. Can be NULL. Will be used to
257 * get PTE size for GVA spaces.
258 * @name: Name of the allocator. Doesn't have to be static storage.
259 * @base: The base address of the resource pool being managed.
260 * @size: Number of resources in the pool.
261 * @blk_size: Minimum number of resources to allocate at once. For things like
262 * semaphores this is 1. For GVA this might be as much as 64k. This
263 * corresponds to order 0. Must be power of 2.
264 * @max_order: Pick a maximum order. If you leave this as 0, the buddy allocator
265 * will try and pick a reasonable max order.
266 * @flags: Extra flags necessary. See GPU_BALLOC_*.
267 */
268int __gk20a_allocator_init(struct gk20a_allocator *a,
269 struct vm_gk20a *vm, const char *name,
270 u64 base, u64 size, u64 blk_size, u64 max_order,
271 u64 flags)
25{ 272{
26 memset(allocator, 0, sizeof(struct gk20a_allocator)); 273 int err;
274
275 memset(a, 0, sizeof(struct gk20a_allocator));
276 strncpy(a->name, name, 32);
277
278 a->base = base;
279 a->length = size;
280 a->blk_size = blk_size;
281 a->blk_shift = __ffs(blk_size);
282
283 /* blk_size must be greater than 0 and a power of 2. */
284 if (blk_size == 0)
285 return -EINVAL;
286 if (blk_size & (blk_size - 1))
287 return -EINVAL;
288
289 if (max_order > GPU_BALLOC_MAX_ORDER)
290 return -EINVAL;
291
292 /* If this is to manage a GVA space we need a VM. */
293 if (flags & GPU_BALLOC_GVA_SPACE && !vm)
294 return -EINVAL;
295
296 a->vm = vm;
297 if (flags & GPU_BALLOC_GVA_SPACE)
298 a->pte_blk_order = balloc_get_order(a, vm->big_page_size << 10);
27 299
28 strncpy(allocator->name, name, 32); 300 a->flags = flags;
301 a->max_order = max_order;
29 302
30 allocator->base = start; 303 balloc_allocator_align(a);
31 allocator->limit = start + len - 1; 304 balloc_compute_max_order(a);
32 305
33 allocator->bitmap = vzalloc(BITS_TO_LONGS(len) * sizeof(long)); 306 /* Shared buddy kmem_cache for all allocators. */
34 if (!allocator->bitmap) 307 if (!buddy_cache)
308 buddy_cache = KMEM_CACHE(gk20a_buddy, 0);
309 if (!buddy_cache)
35 return -ENOMEM; 310 return -ENOMEM;
36 311
37 allocator_dbg(allocator, "%s : base %d, limit %d", 312 a->alloced_buddies = RB_ROOT;
38 allocator->name, allocator->base); 313 err = balloc_init_lists(a);
314 if (err)
315 return err;
39 316
40 init_rwsem(&allocator->rw_sema); 317 mutex_init(&a->lock);
41 318
42 allocator->alloc = gk20a_allocator_block_alloc; 319 a->init = 1;
43 allocator->free = gk20a_allocator_block_free; 320
321 balloc_init_alloc_debug(a);
322 balloc_dbg(a, "New allocator: base 0x%llx\n", a->base);
323 balloc_dbg(a, " size 0x%llx\n", a->length);
324 balloc_dbg(a, " blk_size 0x%llx\n", a->blk_size);
325 balloc_dbg(a, " max_order %llu\n", a->max_order);
326 balloc_dbg(a, " flags 0x%llx\n", a->flags);
44 327
45 return 0; 328 return 0;
46} 329}
47 330
48/* destroy allocator, free all remaining blocks if any */ 331int gk20a_allocator_init(struct gk20a_allocator *a, const char *name,
49void gk20a_allocator_destroy(struct gk20a_allocator *allocator) 332 u64 base, u64 size, u64 blk_size)
333{
334 return __gk20a_allocator_init(a, NULL, name,
335 base, size, blk_size, 0, 0);
336}
337
338/*
339 * Clean up and destroy the passed allocator.
340 */
341void gk20a_allocator_destroy(struct gk20a_allocator *a)
50{ 342{
51 down_write(&allocator->rw_sema); 343 struct rb_node *node;
344 struct gk20a_buddy *bud;
345 struct gk20a_fixed_alloc *falloc;
346 int i;
347
348 balloc_lock(a);
349
350 if (!IS_ERR_OR_NULL(a->debugfs_entry))
351 debugfs_remove(a->debugfs_entry);
352
353 /*
354 * Free the fixed allocs first.
355 */
356 while ((node = rb_first(&a->fixed_allocs)) != NULL) {
357 falloc = container_of(node,
358 struct gk20a_fixed_alloc, alloced_entry);
359
360 __balloc_do_free_fixed(a, falloc);
361 rb_erase(node, &a->fixed_allocs);
362 }
363
364 /*
365 * And now free all outstanding allocations.
366 */
367 while ((node = rb_first(&a->alloced_buddies)) != NULL) {
368 bud = container_of(node, struct gk20a_buddy, alloced_entry);
369 balloc_free_buddy(a, bud->start);
370 balloc_blist_add(a, bud);
371 balloc_coalesce(a, bud);
372 }
52 373
53 vfree(allocator->bitmap); 374 /*
375 * Now clean up the unallocated buddies.
376 */
377 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) {
378 BUG_ON(a->buddy_list_alloced[i] != 0);
379
380 while (!list_empty(balloc_get_order_list(a, i))) {
381 bud = list_first_entry(balloc_get_order_list(a, i),
382 struct gk20a_buddy, buddy_entry);
383 balloc_blist_rem(a, bud);
384 kmem_cache_free(buddy_cache, bud);
385 }
386
387 if (a->buddy_list_len[i] != 0) {
388 pr_info("Excess buddies!!! (%d: %llu)\n",
389 i, a->buddy_list_len[i]);
390 BUG();
391 }
392 if (a->buddy_list_split[i] != 0) {
393 pr_info("Excess split nodes!!! (%d: %llu)\n",
394 i, a->buddy_list_split[i]);
395 BUG();
396 }
397 if (a->buddy_list_alloced[i] != 0) {
398 pr_info("Excess alloced nodes!!! (%d: %llu)\n",
399 i, a->buddy_list_alloced[i]);
400 BUG();
401 }
402 }
54 403
55 memset(allocator, 0, sizeof(struct gk20a_allocator)); 404 a->init = 0;
405
406 balloc_unlock(a);
407
408 /*
409 * We cant unlock an allocator after memsetting it. That wipes the
410 * state of the mutex. Hopefully no one uses the allocator after
411 * destroying it...
412 */
413 memset(a, 0, sizeof(struct gk20a_allocator));
56} 414}
57 415
58/* 416/*
59 * *addr != ~0 for fixed address allocation. if *addr == 0, base addr is 417 * Combine the passed buddy if possible. The pointer in @b may not be valid
60 * returned to caller in *addr. 418 * after this as the buddy may be freed.
61 * 419 *
62 * contiguous allocation, which allocates one block of 420 * @a must be locked.
63 * contiguous address. 421 */
64*/ 422static void balloc_coalesce(struct gk20a_allocator *a, struct gk20a_buddy *b)
65int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator,
66 u32 *addr, u32 len, u32 align)
67{ 423{
68 unsigned long _addr; 424 struct gk20a_buddy *parent;
69 425
70 allocator_dbg(allocator, "[in] addr %d, len %d", *addr, len); 426 if (buddy_is_alloced(b) || buddy_is_split(b))
427 return;
71 428
72 if ((*addr != 0 && *addr < allocator->base) || /* check addr range */ 429 /*
73 *addr + len > allocator->limit || /* check addr range */ 430 * If both our buddy and I are both not allocated and not split then
74 *addr & (align - 1) || /* check addr alignment */ 431 * we can coalesce ourselves.
75 len == 0) /* check len */ 432 */
76 return -EINVAL; 433 if (!b->buddy)
434 return;
435 if (buddy_is_alloced(b->buddy) || buddy_is_split(b->buddy))
436 return;
437
438 parent = b->parent;
439
440 balloc_blist_rem(a, b);
441 balloc_blist_rem(a, b->buddy);
442
443 buddy_clr_split(parent);
444 a->buddy_list_split[parent->order]--;
445 balloc_blist_add(a, parent);
446
447 /*
448 * Recursively coalesce as far as we can go.
449 */
450 balloc_coalesce(a, parent);
451
452 /* Clean up the remains. */
453 kmem_cache_free(buddy_cache, b->buddy);
454 kmem_cache_free(buddy_cache, b);
455}
456
457/*
458 * Split a buddy into two new buddies who are 1/2 the size of the parent buddy.
459 *
460 * @a must be locked.
461 */
462static int balloc_split_buddy(struct gk20a_allocator *a, struct gk20a_buddy *b,
463 int pte_size)
464{
465 struct gk20a_buddy *left, *right;
466 u64 half;
77 467
78 len = ALIGN(len, align); 468 left = balloc_new_buddy(a, b, b->start, b->order - 1);
79 if (!len) 469 if (!left)
80 return -ENOMEM; 470 return -ENOMEM;
81 471
82 down_write(&allocator->rw_sema); 472 half = (b->end - b->start) / 2;
83 473
84 _addr = bitmap_find_next_zero_area(allocator->bitmap, 474 right = balloc_new_buddy(a, b, b->start + half, b->order - 1);
85 allocator->limit - allocator->base + 1, 475 if (!right) {
86 *addr ? (*addr - allocator->base) : 0, 476 kmem_cache_free(buddy_cache, left);
87 len,
88 align - 1);
89 if ((_addr > allocator->limit - allocator->base + 1) ||
90 (*addr && *addr != (_addr + allocator->base))) {
91 up_write(&allocator->rw_sema);
92 return -ENOMEM; 477 return -ENOMEM;
93 } 478 }
94 479
95 bitmap_set(allocator->bitmap, _addr, len); 480 buddy_set_split(b);
96 *addr = allocator->base + _addr; 481 a->buddy_list_split[b->order]++;
97 482
98 up_write(&allocator->rw_sema); 483 b->left = left;
484 b->right = right;
485 left->buddy = right;
486 right->buddy = left;
487 left->parent = b;
488 right->parent = b;
99 489
100 allocator_dbg(allocator, "[out] addr %d, len %d", *addr, len); 490 /* PTE considerations. */
491 if (a->flags & GPU_BALLOC_GVA_SPACE &&
492 left->order <= a->pte_blk_order) {
493 left->pte_size = pte_size;
494 right->pte_size = pte_size;
495 }
496
497 balloc_blist_rem(a, b);
498 balloc_blist_add(a, left);
499 balloc_blist_add(a, right);
101 500
102 return 0; 501 return 0;
103} 502}
104 503
105/* free all blocks between start and end */ 504/*
106int gk20a_allocator_block_free(struct gk20a_allocator *allocator, 505 * Place the passed buddy into the RB tree for allocated buddies. Never fails
107 u32 addr, u32 len, u32 align) 506 * unless the passed entry is a duplicate which is a bug.
507 *
508 * @a must be locked.
509 */
510void balloc_alloc_buddy(struct gk20a_allocator *a, struct gk20a_buddy *b)
108{ 511{
109 allocator_dbg(allocator, "[in] addr %d, len %d", addr, len); 512 struct rb_node **new = &(a->alloced_buddies.rb_node);
513 struct rb_node *parent = NULL;
110 514
111 if (addr + len > allocator->limit || /* check addr range */ 515 while (*new) {
112 addr < allocator->base || 516 struct gk20a_buddy *bud = container_of(*new, struct gk20a_buddy,
113 addr & (align - 1)) /* check addr alignment */ 517 alloced_entry);
114 return -EINVAL;
115 518
116 len = ALIGN(len, align); 519 parent = *new;
117 if (!len) 520 if (b->start < bud->start)
118 return -EINVAL; 521 new = &((*new)->rb_left);
522 else if (b->start > bud->start)
523 new = &((*new)->rb_right);
524 else
525 BUG_ON("Duplicate entries in allocated list!\n");
526 }
527
528 rb_link_node(&b->alloced_entry, parent, new);
529 rb_insert_color(&b->alloced_entry, &a->alloced_buddies);
530
531 buddy_set_alloced(b);
532 a->buddy_list_alloced[b->order]++;
533}
534
535/*
536 * Remove the passed buddy from the allocated buddy RB tree. Returns the
537 * deallocated buddy for further processing.
538 *
539 * @a must be locked.
540 */
541static struct gk20a_buddy *balloc_free_buddy(struct gk20a_allocator *a,
542 u64 addr)
543{
544 struct rb_node *node = a->alloced_buddies.rb_node;
545 struct gk20a_buddy *bud;
546
547 while (node) {
548 bud = container_of(node, struct gk20a_buddy, alloced_entry);
549
550 if (addr < bud->start)
551 node = node->rb_left;
552 else if (addr > bud->start)
553 node = node->rb_right;
554 else
555 break;
556 }
557
558 if (!node)
559 return NULL;
560
561 rb_erase(node, &a->alloced_buddies);
562 buddy_clr_alloced(bud);
563 a->buddy_list_alloced[bud->order]--;
564
565 return bud;
566}
567
568/*
569 * Find a suitable buddy for the given order and PTE type (big or little).
570 */
571static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_allocator *a,
572 u64 order, int pte_size)
573{
574 struct gk20a_buddy *bud;
575
576 if (list_empty(balloc_get_order_list(a, order)))
577 return NULL;
578
579 if (a->flags & GPU_BALLOC_GVA_SPACE &&
580 pte_size == BALLOC_PTE_SIZE_BIG)
581 bud = list_last_entry(balloc_get_order_list(a, order),
582 struct gk20a_buddy, buddy_entry);
583 else
584 bud = list_first_entry(balloc_get_order_list(a, order),
585 struct gk20a_buddy, buddy_entry);
586
587 if (bud->pte_size != BALLOC_PTE_SIZE_ANY &&
588 bud->pte_size != pte_size)
589 return NULL;
590
591 return bud;
592}
593
594/*
595 * Allocate a suitably sized buddy. If no suitable buddy exists split higher
596 * order buddies until we have a suitable buddy to allocate.
597 *
598 * For PDE grouping add an extra check to see if a buddy is suitable: that the
599 * buddy exists in a PDE who's PTE size is reasonable
600 *
601 * @a must be locked.
602 */
603static u64 __balloc_do_alloc(struct gk20a_allocator *a, u64 order, int pte_size)
604{
605 u64 split_order;
606 struct gk20a_buddy *bud;
607
608 split_order = order;
609 while (!(bud = __balloc_find_buddy(a, split_order, pte_size)))
610 split_order++;
611
612 while (bud->order != order) {
613 if (balloc_split_buddy(a, bud, pte_size))
614 return 0; /* No mem... */
615 bud = bud->left;
616 }
617
618 balloc_blist_rem(a, bud);
619 balloc_alloc_buddy(a, bud);
119 620
120 down_write(&allocator->rw_sema); 621 return bud->start;
121 bitmap_clear(allocator->bitmap, addr - allocator->base, len); 622}
122 up_write(&allocator->rw_sema); 623
624/*
625 * Allocate memory from the passed allocator.
626 */
627u64 gk20a_balloc(struct gk20a_allocator *a, u64 len)
628{
629 u64 order, addr;
630 int pte_size;
631
632 balloc_trace_func();
633
634 balloc_lock(a);
635
636 order = balloc_get_order(a, len);
637
638 if (order > a->max_order) {
639 balloc_unlock(a);
640 balloc_dbg(a, "Alloc fail\n");
641 balloc_trace_func_done();
642 return 0;
643 }
644
645 /*
646 * For now pass the base address of the allocator's region to
647 * __get_pte_size(). This ensures we get the right page size for
648 * the alloc but we don't have to know what the real address is
649 * going to be quite yet.
650 *
651 * TODO: once userspace supports a unified address space pass 0 for
652 * the base. This will make only 'len' affect the PTE size.
653 */
654 if (a->flags & GPU_BALLOC_GVA_SPACE)
655 pte_size = __get_pte_size(a->vm, a->base, len);
656 else
657 pte_size = BALLOC_PTE_SIZE_ANY;
658
659 addr = __balloc_do_alloc(a, order, pte_size);
660
661 a->bytes_alloced += len;
662 a->bytes_alloced_real += balloc_order_to_len(a, order);
663
664 balloc_unlock(a);
665 balloc_dbg(a, "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n",
666 addr, order, len,
667 pte_size == gmmu_page_size_big ? "big" :
668 pte_size == gmmu_page_size_small ? "small" :
669 "NA/any");
670
671 balloc_trace_func_done();
672 return addr;
673}
674
675/*
676 * See if the passed range is actually available for allocation. If so, then
677 * return 1, otherwise return 0.
678 *
679 * TODO: Right now this uses the unoptimal approach of going through all
680 * outstanding allocations and checking their base/ends. This could be better.
681 */
682static int balloc_is_range_free(struct gk20a_allocator *a, u64 base, u64 end)
683{
684 struct rb_node *node;
685 struct gk20a_buddy *bud;
686
687 node = rb_first(&a->alloced_buddies);
688 if (!node)
689 return 1; /* No allocs yet. */
690
691 bud = container_of(node, struct gk20a_buddy, alloced_entry);
692
693 while (bud->start < end) {
694 if ((bud->start > base && bud->start < end) ||
695 (bud->end > base && bud->end < end))
696 return 0;
697
698 node = rb_next(node);
699 if (!node)
700 break;
701 bud = container_of(node, struct gk20a_buddy, alloced_entry);
702 }
703
704 return 1;
705}
706
707static void balloc_alloc_fixed(struct gk20a_allocator *a,
708 struct gk20a_fixed_alloc *f)
709{
710 struct rb_node **new = &(a->fixed_allocs.rb_node);
711 struct rb_node *parent = NULL;
712
713 while (*new) {
714 struct gk20a_fixed_alloc *falloc =
715 container_of(*new, struct gk20a_fixed_alloc,
716 alloced_entry);
717
718 parent = *new;
719 if (f->start < falloc->start)
720 new = &((*new)->rb_left);
721 else if (f->start > falloc->start)
722 new = &((*new)->rb_right);
723 else
724 BUG_ON("Duplicate entries in allocated list!\n");
725 }
726
727 rb_link_node(&f->alloced_entry, parent, new);
728 rb_insert_color(&f->alloced_entry, &a->fixed_allocs);
729}
730
731/*
732 * Remove the passed buddy from the allocated buddy RB tree. Returns the
733 * deallocated buddy for further processing.
734 *
735 * @a must be locked.
736 */
737static struct gk20a_fixed_alloc *balloc_free_fixed(struct gk20a_allocator *a,
738 u64 addr)
739{
740 struct rb_node *node = a->fixed_allocs.rb_node;
741 struct gk20a_fixed_alloc *falloc;
742
743 while (node) {
744 falloc = container_of(node,
745 struct gk20a_fixed_alloc, alloced_entry);
746
747 if (addr < falloc->start)
748 node = node->rb_left;
749 else if (addr > falloc->start)
750 node = node->rb_right;
751 else
752 break;
753 }
754
755 if (!node)
756 return NULL;
757
758 rb_erase(node, &a->fixed_allocs);
759
760 return falloc;
761}
762
763/*
764 * Find the parent range - doesn't necessarily need the parent to actually exist
765 * as a buddy. Finding an existing parent comes later...
766 */
767static void __balloc_get_parent_range(struct gk20a_allocator *a,
768 u64 base, u64 order,
769 u64 *pbase, u64 *porder)
770{
771 u64 base_mask;
772 u64 shifted_base = balloc_base_shift(a, base);
773
774 order++;
775 base_mask = ~((a->blk_size << order) - 1);
776
777 shifted_base &= base_mask;
778
779 *pbase = balloc_base_unshift(a, shifted_base);
780 *porder = order;
781}
782
783/*
784 * Makes a buddy at the passed address. This will make all parent buddies
785 * necessary for this buddy to exist as well.
786 */
787static struct gk20a_buddy *__balloc_make_fixed_buddy(struct gk20a_allocator *a,
788 u64 base, u64 order)
789{
790 struct gk20a_buddy *bud = NULL;
791 struct list_head *order_list;
792 u64 cur_order = order, cur_base = base;
793
794 /*
795 * Algo:
796 * 1. Keep jumping up a buddy order until we find the real buddy that
797 * this buddy exists in.
798 * 2. Then work our way down through the buddy tree until we hit a dead
799 * end.
800 * 3. Start splitting buddies until we split to the one we need to
801 * make.
802 */
803 while (cur_order <= a->max_order) {
804 int found = 0;
805
806 order_list = balloc_get_order_list(a, cur_order);
807 list_for_each_entry(bud, order_list, buddy_entry) {
808 if (bud->start == cur_base) {
809 found = 1;
810 break;
811 }
812 }
813
814 if (found)
815 break;
816
817 __balloc_get_parent_range(a, cur_base, cur_order,
818 &cur_base, &cur_order);
819 }
820
821 if (cur_order > a->max_order) {
822 balloc_dbg(a, "No buddy for range ???\n");
823 return NULL;
824 }
825
826 /* Split this buddy as necessary until we get the target buddy. */
827 while (bud->start != base || bud->order != order) {
828 if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) {
829 balloc_coalesce(a, bud);
830 return NULL;
831 }
832
833 if (base < bud->right->start)
834 bud = bud->left;
835 else
836 bud = bud->right;
837
838 }
839
840 return bud;
841}
842
843static u64 __balloc_do_alloc_fixed(struct gk20a_allocator *a,
844 struct gk20a_fixed_alloc *falloc,
845 u64 base, u64 len)
846{
847 u64 shifted_base, inc_base;
848 u64 align_order;
849
850 shifted_base = balloc_base_shift(a, base);
851 if (shifted_base == 0)
852 align_order = __fls(len >> a->blk_shift);
853 else
854 align_order = min_t(u64,
855 __ffs(shifted_base >> a->blk_shift),
856 __fls(len >> a->blk_shift));
857
858 if (align_order > a->max_order) {
859 balloc_dbg(a, "Align order too big: %llu > %llu\n",
860 align_order, a->max_order);
861 return 0;
862 }
863
864 /*
865 * Generate a list of buddies that satisfy this allocation.
866 */
867 inc_base = shifted_base;
868 while (inc_base < (shifted_base + len)) {
869 u64 order_len = balloc_order_to_len(a, align_order);
870 u64 remaining;
871 struct gk20a_buddy *bud;
872
873 bud = __balloc_make_fixed_buddy(a,
874 balloc_base_unshift(a, inc_base),
875 align_order);
876 if (!bud) {
877 balloc_dbg(a, "Fixed buddy failed: {0x%llx, %llu}!\n",
878 balloc_base_unshift(a, inc_base),
879 align_order);
880 goto err_and_cleanup;
881 }
882
883 balloc_blist_rem(a, bud);
884 balloc_alloc_buddy(a, bud);
885 __balloc_buddy_list_add(a, bud, &falloc->buddies);
886
887 /* Book keeping. */
888 inc_base += order_len;
889 remaining = (shifted_base + len) - inc_base;
890 align_order = __ffs(inc_base >> a->blk_shift);
891
892 /* If we don't have much left - trim down align_order. */
893 if (balloc_order_to_len(a, align_order) > remaining)
894 align_order = __balloc_max_order_in(a, inc_base,
895 inc_base + remaining);
896 }
897
898 return base;
123 899
124 allocator_dbg(allocator, "[out] addr %d, len %d", addr, len); 900err_and_cleanup:
901 while (!list_empty(&falloc->buddies)) {
902 struct gk20a_buddy *bud = list_first_entry(&falloc->buddies,
903 struct gk20a_buddy,
904 buddy_entry);
905
906 __balloc_buddy_list_rem(a, bud);
907 balloc_free_buddy(a, bud->start);
908 kmem_cache_free(buddy_cache, bud);
909 }
910
911 return 0;
912}
913
914/*
915 * Allocate a fixed address allocation. The address of the allocation is @base
916 * and the length is @len. This is not a typical buddy allocator operation and
917 * as such has a high posibility of failure if the address space is heavily in
918 * use.
919 *
920 * Please do not use this function unless _absolutely_ necessary.
921 */
922u64 gk20a_balloc_fixed(struct gk20a_allocator *a, u64 base, u64 len)
923{
924 struct gk20a_fixed_alloc *falloc = NULL;
925 struct gk20a_buddy *bud;
926 u64 ret, real_bytes = 0;
927
928 balloc_trace_func();
929
930 /* If base isn't aligned to an order 0 block, fail. */
931 if (base & (a->blk_size - 1))
932 goto fail;
933
934 if (len == 0)
935 goto fail;
936
937 falloc = kmalloc(sizeof(*falloc), GFP_KERNEL);
938 if (!falloc)
939 goto fail;
940
941 INIT_LIST_HEAD(&falloc->buddies);
942 falloc->start = base;
943 falloc->end = base + len;
944
945 balloc_lock(a);
946 if (!balloc_is_range_free(a, base, base + len)) {
947 balloc_dbg(a, "Range not free: 0x%llx -> 0x%llx\n",
948 base, base + len);
949 goto fail_unlock;
950 }
951
952 ret = __balloc_do_alloc_fixed(a, falloc, base, len);
953 if (!ret) {
954 balloc_dbg(a, "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n",
955 base, base + len);
956 goto fail_unlock;
957 }
958
959 balloc_alloc_fixed(a, falloc);
960
961 list_for_each_entry(bud, &falloc->buddies, buddy_entry)
962 real_bytes += (bud->end - bud->start);
963
964 a->bytes_alloced += len;
965 a->bytes_alloced_real += real_bytes;
966
967 balloc_unlock(a);
968 balloc_dbg(a, "Alloc (fixed) 0x%llx\n", base);
969
970 balloc_trace_func_done();
971 return base;
972
973fail_unlock:
974 balloc_unlock(a);
975fail:
976 kfree(falloc);
977 balloc_trace_func_done();
978 return 0;
979}
980
981static void __balloc_do_free_fixed(struct gk20a_allocator *a,
982 struct gk20a_fixed_alloc *falloc)
983{
984 struct gk20a_buddy *bud;
985
986 while (!list_empty(&falloc->buddies)) {
987 bud = list_first_entry(&falloc->buddies,
988 struct gk20a_buddy,
989 buddy_entry);
990 __balloc_buddy_list_rem(a, bud);
991
992 balloc_free_buddy(a, bud->start);
993 balloc_blist_add(a, bud);
994 a->bytes_freed += balloc_order_to_len(a, bud->order);
995
996 /*
997 * Attemp to defrag the allocation.
998 */
999 balloc_coalesce(a, bud);
1000 }
1001
1002 kfree(falloc);
1003}
1004
1005/*
1006 * Free the passed allocation.
1007 */
1008void gk20a_bfree(struct gk20a_allocator *a, u64 addr)
1009{
1010 struct gk20a_buddy *bud;
1011 struct gk20a_fixed_alloc *falloc;
1012
1013 balloc_trace_func();
1014
1015 if (!addr) {
1016 balloc_trace_func_done();
1017 return;
1018 }
1019
1020 balloc_lock(a);
1021
1022 /*
1023 * First see if this is a fixed alloc. If not fall back to a regular
1024 * buddy.
1025 */
1026 falloc = balloc_free_fixed(a, addr);
1027 if (falloc) {
1028 __balloc_do_free_fixed(a, falloc);
1029 goto done;
1030 }
1031
1032 bud = balloc_free_buddy(a, addr);
1033 if (!bud)
1034 goto done;
1035
1036 balloc_blist_add(a, bud);
1037 a->bytes_freed += balloc_order_to_len(a, bud->order);
1038
1039 /*
1040 * Attemp to defrag the allocation.
1041 */
1042 balloc_coalesce(a, bud);
1043
1044done:
1045 balloc_unlock(a);
1046 balloc_dbg(a, "Free 0x%llx\n", addr);
1047 balloc_trace_func_done();
1048 return;
1049}
1050
1051/*
1052 * Print the buddy allocator top level stats. If you pass @s as NULL then the
1053 * stats are printed to the kernel log. This lets this code be used for
1054 * debugging purposes internal to the allocator.
1055 */
1056static void balloc_print_stats(struct gk20a_allocator *a, struct seq_file *s,
1057 int lock)
1058{
1059#define __balloc_pstat(s, fmt, arg...) \
1060 do { \
1061 if (s) \
1062 seq_printf(s, fmt, ##arg); \
1063 else \
1064 balloc_dbg(a, fmt, ##arg); \
1065 } while (0)
1066
1067 int i;
1068 struct rb_node *node;
1069 struct gk20a_fixed_alloc *falloc;
1070
1071 __balloc_pstat(s, "base = %llu, limit = %llu, blk_size = %llu\n",
1072 a->base, a->length, a->blk_size);
1073 __balloc_pstat(s, "Internal params:\n");
1074 __balloc_pstat(s, " start = %llu\n", a->start);
1075 __balloc_pstat(s, " end = %llu\n", a->end);
1076 __balloc_pstat(s, " count = %llu\n", a->count);
1077 __balloc_pstat(s, " blks = %llu\n", a->blks);
1078 __balloc_pstat(s, " max_order = %llu\n", a->max_order);
1079
1080 __balloc_pstat(s, "Buddy blocks:\n");
1081 __balloc_pstat(s, " Order Free Alloced Split\n");
1082 __balloc_pstat(s, " ----- ---- ------- -----\n");
1083
1084 if (lock)
1085 balloc_lock(a);
1086 for (i = a->max_order; i >= 0; i--) {
1087 if (a->buddy_list_len[i] == 0 &&
1088 a->buddy_list_alloced[i] == 0 &&
1089 a->buddy_list_split[i] == 0)
1090 continue;
1091
1092 __balloc_pstat(s, " %3d %-7llu %-9llu %llu\n", i,
1093 a->buddy_list_len[i],
1094 a->buddy_list_alloced[i],
1095 a->buddy_list_split[i]);
1096 }
1097
1098 __balloc_pstat(s, "\n");
1099
1100 for (node = rb_first(&a->fixed_allocs), i = 1;
1101 node != NULL;
1102 node = rb_next(node)) {
1103 falloc = container_of(node,
1104 struct gk20a_fixed_alloc, alloced_entry);
1105
1106 __balloc_pstat(s, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n",
1107 i, falloc->start, falloc->end);
1108 }
1109
1110 __balloc_pstat(s, "\n");
1111 __balloc_pstat(s, "Bytes allocated: %llu\n", a->bytes_alloced);
1112 __balloc_pstat(s, "Bytes allocated (real): %llu\n",
1113 a->bytes_alloced_real);
1114 __balloc_pstat(s, "Bytes freed: %llu\n", a->bytes_freed);
1115
1116 if (lock)
1117 balloc_unlock(a);
1118
1119#undef __balloc_pstats
1120}
1121
1122static int __alloc_show(struct seq_file *s, void *unused)
1123{
1124 struct gk20a_allocator *a = s->private;
1125
1126 balloc_print_stats(a, s, 1);
125 1127
126 return 0; 1128 return 0;
127} 1129}
1130
1131static int __alloc_open(struct inode *inode, struct file *file)
1132{
1133 return single_open(file, __alloc_show, inode->i_private);
1134}
1135
1136static const struct file_operations __alloc_fops = {
1137 .open = __alloc_open,
1138 .read = seq_read,
1139 .llseek = seq_lseek,
1140 .release = single_release,
1141};
1142
1143static void balloc_init_alloc_debug(struct gk20a_allocator *a)
1144{
1145 if (!balloc_debugfs_root)
1146 return;
1147
1148 a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO,
1149 balloc_debugfs_root,
1150 a, &__alloc_fops);
1151}
1152
1153void gk20a_alloc_debugfs_init(struct platform_device *pdev)
1154{
1155 struct gk20a_platform *platform = platform_get_drvdata(pdev);
1156 struct dentry *gpu_root = platform->debugfs;
1157
1158 balloc_debugfs_root = debugfs_create_dir("allocators", gpu_root);
1159 if (IS_ERR_OR_NULL(balloc_debugfs_root))
1160 return;
1161
1162 debugfs_create_u32("tracing", 0664, balloc_debugfs_root,
1163 &balloc_tracing_on);
1164}