summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2016-06-27 20:46:02 -0400
committerAlex Waterman <alexw@nvidia.com>2016-07-19 14:30:45 -0400
commit5672cbdf6d8e7b8b93a08cd388097e2d1f0a8843 (patch)
treec00d0cc5c7f46ffe39c14bfdb6585716cd071bde /drivers
parentb6569319c772d84087a0a1a6d7146bdcae8e9aab (diff)
gpu: nvgpu: Move buddy allocator to new file
Move the buddy allocator implementation to a new file to make the code more organized. Also, as part of this, commonize some macros and functions which will be used by future allocator implementations. Bug 1781897 Change-Id: I1611534d5d872bf3b4677f7a1cc024a94b1c437e Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1172116 Reviewed-by: Yu-Huan Hsu <yhsu@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/Makefile1
-rw-r--r--drivers/gpu/nvgpu/gk20a/buddy_allocator_priv.h190
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator.c1216
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator.h200
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c1187
5 files changed, 1423 insertions, 1371 deletions
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 0fdd2e28..a9a0f92f 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -53,6 +53,7 @@ nvgpu-y := \
53 gk20a/hal.o \ 53 gk20a/hal.o \
54 gk20a/hal_gk20a.o \ 54 gk20a/hal_gk20a.o \
55 gk20a/gk20a_allocator.o \ 55 gk20a/gk20a_allocator.o \
56 gk20a/gk20a_allocator_buddy.o \
56 gk20a/cde_gk20a.o \ 57 gk20a/cde_gk20a.o \
57 gk20a/platform_gk20a_generic.o \ 58 gk20a/platform_gk20a_generic.o \
58 gk20a/tsg_gk20a.o \ 59 gk20a/tsg_gk20a.o \
diff --git a/drivers/gpu/nvgpu/gk20a/buddy_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/buddy_allocator_priv.h
new file mode 100644
index 00000000..bed0492c
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/buddy_allocator_priv.h
@@ -0,0 +1,190 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef BUDDY_ALLOCATOR_PRIV_H
18#define BUDDY_ALLOCATOR_PRIV_H
19
20#include <linux/list.h>
21#include <linux/rbtree.h>
22
23struct gk20a_allocator;
24struct vm_gk20a;
25
26/*
27 * Each buddy is an element in a binary tree.
28 */
29struct gk20a_buddy {
30 struct gk20a_buddy *parent; /* Parent node. */
31 struct gk20a_buddy *buddy; /* This node's buddy. */
32 struct gk20a_buddy *left; /* Lower address sub-node. */
33 struct gk20a_buddy *right; /* Higher address sub-node. */
34
35 struct list_head buddy_entry; /* List entry for various lists. */
36 struct rb_node alloced_entry; /* RB tree of allocations. */
37
38 u64 start; /* Start address of this buddy. */
39 u64 end; /* End address of this buddy. */
40 u64 order; /* Buddy order. */
41
42#define BALLOC_BUDDY_ALLOCED 0x1
43#define BALLOC_BUDDY_SPLIT 0x2
44#define BALLOC_BUDDY_IN_LIST 0x4
45 int flags; /* List of associated flags. */
46
47 /*
48 * Size of the PDE this buddy is using. This allows for grouping like
49 * sized allocations into the same PDE.
50 */
51#define BALLOC_PTE_SIZE_ANY 0x0
52#define BALLOC_PTE_SIZE_SMALL 0x1
53#define BALLOC_PTE_SIZE_BIG 0x2
54 int pte_size;
55};
56
57#define __buddy_flag_ops(flag, flag_up) \
58 static inline int buddy_is_ ## flag(struct gk20a_buddy *b) \
59 { \
60 return b->flags & BALLOC_BUDDY_ ## flag_up; \
61 } \
62 static inline void buddy_set_ ## flag(struct gk20a_buddy *b) \
63 { \
64 b->flags |= BALLOC_BUDDY_ ## flag_up; \
65 } \
66 static inline void buddy_clr_ ## flag(struct gk20a_buddy *b) \
67 { \
68 b->flags &= ~BALLOC_BUDDY_ ## flag_up; \
69 }
70
71/*
72 * int buddy_is_alloced(struct gk20a_buddy *b);
73 * void buddy_set_alloced(struct gk20a_buddy *b);
74 * void buddy_clr_alloced(struct gk20a_buddy *b);
75 *
76 * int buddy_is_split(struct gk20a_buddy *b);
77 * void buddy_set_split(struct gk20a_buddy *b);
78 * void buddy_clr_split(struct gk20a_buddy *b);
79 *
80 * int buddy_is_in_list(struct gk20a_buddy *b);
81 * void buddy_set_in_list(struct gk20a_buddy *b);
82 * void buddy_clr_in_list(struct gk20a_buddy *b);
83 */
84__buddy_flag_ops(alloced, ALLOCED);
85__buddy_flag_ops(split, SPLIT);
86__buddy_flag_ops(in_list, IN_LIST);
87
88/*
89 * Keeps info for a fixed allocation.
90 */
91struct gk20a_fixed_alloc {
92 struct list_head buddies; /* List of buddies. */
93 struct rb_node alloced_entry; /* RB tree of fixed allocations. */
94
95 u64 start; /* Start of fixed block. */
96 u64 end; /* End address. */
97};
98
99/*
100 * GPU buddy allocator for the various GPU address spaces. Each addressable unit
101 * doesn't have to correspond to a byte. In some cases each unit is a more
102 * complex object such as a comp_tag line or the like.
103 *
104 * The max order is computed based on the size of the minimum order and the size
105 * of the address space.
106 *
107 * order_size is the size of an order 0 buddy.
108 */
109struct gk20a_buddy_allocator {
110 struct gk20a_allocator *owner; /* Owner of this buddy allocator. */
111 struct vm_gk20a *vm; /* Parent VM - can be NULL. */
112
113 u64 base; /* Base address of the space. */
114 u64 length; /* Length of the space. */
115 u64 blk_size; /* Size of order 0 allocation. */
116 u64 blk_shift; /* Shift to divide by blk_size. */
117
118 /* Internal stuff. */
119 u64 start; /* Real start (aligned to blk_size). */
120 u64 end; /* Real end, trimmed if needed. */
121 u64 count; /* Count of objects in space. */
122 u64 blks; /* Count of blks in the space. */
123 u64 max_order; /* Specific maximum order. */
124
125 struct rb_root alloced_buddies; /* Outstanding allocations. */
126 struct rb_root fixed_allocs; /* Outstanding fixed allocations. */
127
128 /*
129 * Impose an upper bound on the maximum order.
130 */
131#define GPU_BALLOC_ORDER_LIST_LEN (GPU_BALLOC_MAX_ORDER + 1)
132
133 struct list_head buddy_list[GPU_BALLOC_ORDER_LIST_LEN];
134 u64 buddy_list_len[GPU_BALLOC_ORDER_LIST_LEN];
135 u64 buddy_list_split[GPU_BALLOC_ORDER_LIST_LEN];
136 u64 buddy_list_alloced[GPU_BALLOC_ORDER_LIST_LEN];
137
138 /*
139 * This is for when the allocator is managing a GVA space (the
140 * GPU_BALLOC_GVA_SPACE bit is set in @flags). This requires
141 * that we group like sized allocations into PDE blocks.
142 */
143 u64 pte_blk_order;
144
145 int initialized;
146
147 u64 flags;
148
149 u64 bytes_alloced;
150 u64 bytes_alloced_real;
151 u64 bytes_freed;
152};
153
154static inline struct gk20a_buddy_allocator *buddy_allocator(
155 struct gk20a_allocator *a)
156{
157 return (struct gk20a_buddy_allocator *)(a)->priv;
158}
159
160static inline struct list_head *balloc_get_order_list(
161 struct gk20a_buddy_allocator *a, int order)
162{
163 return &a->buddy_list[order];
164}
165
166static inline u64 balloc_order_to_len(struct gk20a_buddy_allocator *a,
167 int order)
168{
169 return (1 << order) * a->blk_size;
170}
171
172static inline u64 balloc_base_shift(struct gk20a_buddy_allocator *a,
173 u64 base)
174{
175 return base - a->start;
176}
177
178static inline u64 balloc_base_unshift(struct gk20a_buddy_allocator *a,
179 u64 base)
180{
181 return base + a->start;
182}
183
184static inline struct gk20a_allocator *balloc_owner(
185 struct gk20a_buddy_allocator *a)
186{
187 return a->owner;
188}
189
190#endif
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
index f2164768..b7e9a5e4 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
@@ -19,125 +19,13 @@
19#include <linux/kernel.h> 19#include <linux/kernel.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21 21
22#include "mm_gk20a.h"
22#include "platform_gk20a.h" 23#include "platform_gk20a.h"
23#include "gk20a_allocator.h" 24#include "gk20a_allocator.h"
24 25
25#include "mm_gk20a.h"
26
27static struct dentry *gk20a_alloc_debugfs_root;
28
29static struct kmem_cache *buddy_cache; /* slab cache for meta data. */
30
31u32 gk20a_alloc_tracing_on; 26u32 gk20a_alloc_tracing_on;
32 27
33#define gk20a_alloc_trace_func() \ 28static struct dentry *gk20a_alloc_debugfs_root;
34 do { \
35 if (gk20a_alloc_tracing_on) \
36 trace_printk("%s\n", __func__); \
37 } while (0)
38
39#define gk20a_alloc_trace_func_done() \
40 do { \
41 if (gk20a_alloc_tracing_on) \
42 trace_printk("%s_done\n", __func__); \
43 } while (0)
44
45/*
46 * Buddy allocator implementation.
47 */
48static u64 gk20a_buddy_alloc(struct gk20a_allocator *__a, u64 len);
49static void gk20a_buddy_free(struct gk20a_allocator *__a, u64 addr);
50static u64 gk20a_buddy_alloc_fixed(struct gk20a_allocator *__a,
51 u64 base, u64 len);
52static u64 gk20a_buddy_alloc_base(struct gk20a_allocator *a);
53static u64 gk20a_buddy_alloc_length(struct gk20a_allocator *a);
54static u64 gk20a_buddy_alloc_end(struct gk20a_allocator *a);
55static int gk20a_buddy_alloc_inited(struct gk20a_allocator *a);
56
57static void gk20a_buddy_allocator_destroy(struct gk20a_allocator *__a);
58static void gk20a_buddy_print_stats(struct gk20a_allocator *__a,
59 struct seq_file *s, int lock);
60
61/* Some other buddy allocator functions. */
62static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a,
63 u64 addr);
64static void balloc_coalesce(struct gk20a_buddy_allocator *a,
65 struct gk20a_buddy *b);
66static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a,
67 struct gk20a_fixed_alloc *falloc);
68
69/* Debugging. */
70static void gk20a_init_alloc_debug(struct gk20a_allocator *a);
71
72/*
73 * This function is not present in older kernel's list.h code.
74 */
75#ifndef list_last_entry
76#define list_last_entry(ptr, type, member) \
77 list_entry((ptr)->prev, type, member)
78#endif
79
80static const struct gk20a_allocator_ops buddy_ops = {
81 .alloc = gk20a_buddy_alloc,
82 .free = gk20a_buddy_free,
83
84 .alloc_fixed = gk20a_buddy_alloc_fixed,
85 /* .free_fixed not needed. */
86
87 .base = gk20a_buddy_alloc_base,
88 .length = gk20a_buddy_alloc_length,
89 .end = gk20a_buddy_alloc_end,
90 .inited = gk20a_buddy_alloc_inited,
91
92 .fini = gk20a_buddy_allocator_destroy,
93
94 .print_stats = gk20a_buddy_print_stats,
95};
96
97/*
98 * GPU buddy allocator for various address spaces.
99 *
100 * Current limitations:
101 * o A fixed allocation could potentially be made that borders PDEs with
102 * different PTE sizes. This would require that fixed buffer to have
103 * different sized PTEs for different parts of the allocation. Probably
104 * best to just require PDE alignment for fixed address allocs.
105 *
106 * o It is currently possible to make an allocator that has a buddy alignment
107 * out of sync with the PDE block size alignment. A simple example is a
108 * 32GB address space starting at byte 1. Every buddy is shifted off by 1
109 * which means each buddy corresponf to more than one actual GPU page. The
110 * best way to fix this is probably just require PDE blocksize alignment
111 * for the start of the address space. At the moment all allocators are
112 * easily PDE aligned so this hasn't been a problem.
113 */
114
115static u64 gk20a_buddy_alloc_length(struct gk20a_allocator *a)
116{
117 struct gk20a_buddy_allocator *ba = a->priv;
118
119 return ba->length;
120}
121
122static u64 gk20a_buddy_alloc_base(struct gk20a_allocator *a)
123{
124 struct gk20a_buddy_allocator *ba = a->priv;
125
126 return ba->start;
127}
128
129static int gk20a_buddy_alloc_inited(struct gk20a_allocator *a)
130{
131 struct gk20a_buddy_allocator *ba = a->priv;
132
133 return ba->inited;
134}
135static u64 gk20a_buddy_alloc_end(struct gk20a_allocator *a)
136{
137 struct gk20a_buddy_allocator *ba = a->priv;
138
139 return ba->end;
140}
141 29
142u64 gk20a_alloc_length(struct gk20a_allocator *a) 30u64 gk20a_alloc_length(struct gk20a_allocator *a)
143{ 31{
@@ -195,189 +83,11 @@ void gk20a_alloc_destroy(struct gk20a_allocator *a)
195} 83}
196 84
197/* 85/*
198 * Pick a suitable maximum order for this allocator.
199 *
200 * Hueristic: Just guessing that the best max order is the largest single
201 * block that will fit in the address space.
202 */
203static void balloc_compute_max_order(struct gk20a_buddy_allocator *a)
204{
205 u64 true_max_order = ilog2(a->blks);
206
207 if (a->max_order == 0) {
208 a->max_order = true_max_order;
209 return;
210 }
211
212 if (a->max_order > true_max_order)
213 a->max_order = true_max_order;
214 if (a->max_order > GPU_BALLOC_MAX_ORDER)
215 a->max_order = GPU_BALLOC_MAX_ORDER;
216}
217
218/*
219 * Since we can only allocate in chucks of a->blk_size we need to trim off
220 * any excess data that is not aligned to a->blk_size.
221 */
222static void balloc_allocator_align(struct gk20a_buddy_allocator *a)
223{
224 a->start = ALIGN(a->base, a->blk_size);
225 WARN_ON(a->start != a->base);
226 a->end = (a->base + a->length) & ~(a->blk_size - 1);
227 a->count = a->end - a->start;
228 a->blks = a->count >> a->blk_shift;
229}
230
231/*
232 * Pass NULL for parent if you want a top level buddy.
233 */
234static struct gk20a_buddy *balloc_new_buddy(struct gk20a_buddy_allocator *a,
235 struct gk20a_buddy *parent,
236 u64 start, u64 order)
237{
238 struct gk20a_buddy *new_buddy;
239
240 new_buddy = kmem_cache_alloc(buddy_cache, GFP_KERNEL);
241 if (!new_buddy)
242 return NULL;
243
244 memset(new_buddy, 0, sizeof(struct gk20a_buddy));
245
246 new_buddy->parent = parent;
247 new_buddy->start = start;
248 new_buddy->order = order;
249 new_buddy->end = start + (1 << order) * a->blk_size;
250
251 return new_buddy;
252}
253
254static void __balloc_buddy_list_add(struct gk20a_buddy_allocator *a,
255 struct gk20a_buddy *b,
256 struct list_head *list)
257{
258 if (buddy_is_in_list(b)) {
259 alloc_dbg(balloc_owner(a),
260 "Oops: adding added buddy (%llu:0x%llx)\n",
261 b->order, b->start);
262 BUG();
263 }
264
265 /*
266 * Add big PTE blocks to the tail, small to the head for GVA spaces.
267 * This lets the code that checks if there are available blocks check
268 * without cycling through the entire list.
269 */
270 if (a->flags & GPU_BALLOC_GVA_SPACE &&
271 b->pte_size == BALLOC_PTE_SIZE_BIG)
272 list_add_tail(&b->buddy_entry, list);
273 else
274 list_add(&b->buddy_entry, list);
275
276 buddy_set_in_list(b);
277}
278
279static void __balloc_buddy_list_rem(struct gk20a_buddy_allocator *a,
280 struct gk20a_buddy *b)
281{
282 if (!buddy_is_in_list(b)) {
283 alloc_dbg(balloc_owner(a),
284 "Oops: removing removed buddy (%llu:0x%llx)\n",
285 b->order, b->start);
286 BUG();
287 }
288
289 list_del_init(&b->buddy_entry);
290 buddy_clr_in_list(b);
291}
292
293/*
294 * Add a buddy to one of the buddy lists and deal with the necessary
295 * book keeping. Adds the buddy to the list specified by the buddy's order.
296 */
297static void balloc_blist_add(struct gk20a_buddy_allocator *a,
298 struct gk20a_buddy *b)
299{
300 __balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order));
301 a->buddy_list_len[b->order]++;
302}
303
304static void balloc_blist_rem(struct gk20a_buddy_allocator *a,
305 struct gk20a_buddy *b)
306{
307 __balloc_buddy_list_rem(a, b);
308 a->buddy_list_len[b->order]--;
309}
310
311static u64 balloc_get_order(struct gk20a_buddy_allocator *a, u64 len)
312{
313 if (len == 0)
314 return 0;
315
316 len--;
317 len >>= a->blk_shift;
318
319 return fls(len);
320}
321
322static u64 __balloc_max_order_in(struct gk20a_buddy_allocator *a,
323 u64 start, u64 end)
324{
325 u64 size = (end - start) >> a->blk_shift;
326
327 if (size > 0)
328 return min_t(u64, ilog2(size), a->max_order);
329 else
330 return GPU_BALLOC_MAX_ORDER;
331}
332
333/*
334 * Initialize the buddy lists.
335 */
336static int balloc_init_lists(struct gk20a_buddy_allocator *a)
337{
338 int i;
339 u64 bstart, bend, order;
340 struct gk20a_buddy *buddy;
341
342 bstart = a->start;
343 bend = a->end;
344
345 /* First make sure the LLs are valid. */
346 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++)
347 INIT_LIST_HEAD(balloc_get_order_list(a, i));
348
349 while (bstart < bend) {
350 order = __balloc_max_order_in(a, bstart, bend);
351
352 buddy = balloc_new_buddy(a, NULL, bstart, order);
353 if (!buddy)
354 goto cleanup;
355
356 balloc_blist_add(a, buddy);
357 bstart += balloc_order_to_len(a, order);
358 }
359
360 return 0;
361
362cleanup:
363 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) {
364 if (!list_empty(balloc_get_order_list(a, i))) {
365 buddy = list_first_entry(balloc_get_order_list(a, i),
366 struct gk20a_buddy, buddy_entry);
367 balloc_blist_rem(a, buddy);
368 kmem_cache_free(buddy_cache, buddy);
369 }
370 }
371
372 return -ENOMEM;
373}
374
375/*
376 * Handle the common init stuff for a gk20a_allocator. 86 * Handle the common init stuff for a gk20a_allocator.
377 */ 87 */
378static int __gk20a_alloc_common_init(struct gk20a_allocator *a, 88int __gk20a_alloc_common_init(struct gk20a_allocator *a,
379 const char *name, void *priv, 89 const char *name, void *priv,
380 const struct gk20a_allocator_ops *ops) 90 const struct gk20a_allocator_ops *ops)
381{ 91{
382 if (!ops) 92 if (!ops)
383 return -EINVAL; 93 return -EINVAL;
@@ -392,909 +102,6 @@ static int __gk20a_alloc_common_init(struct gk20a_allocator *a,
392 return 0; 102 return 0;
393} 103}
394 104
395/*
396 * Initialize a buddy allocator. Returns 0 on success. This allocator does
397 * not necessarily manage bytes. It manages distinct ranges of resources. This
398 * allows the allocator to work for things like comp_tags, semaphores, etc.
399 *
400 * @allocator: Ptr to an allocator struct to init.
401 * @vm: GPU VM to associate this allocator with. Can be NULL. Will be used to
402 * get PTE size for GVA spaces.
403 * @name: Name of the allocator. Doesn't have to be static storage.
404 * @base: The base address of the resource pool being managed.
405 * @size: Number of resources in the pool.
406 * @blk_size: Minimum number of resources to allocate at once. For things like
407 * semaphores this is 1. For GVA this might be as much as 64k. This
408 * corresponds to order 0. Must be power of 2.
409 * @max_order: Pick a maximum order. If you leave this as 0, the buddy allocator
410 * will try and pick a reasonable max order.
411 * @flags: Extra flags necessary. See GPU_BALLOC_*.
412 */
413int __gk20a_buddy_allocator_init(struct gk20a_allocator *__a,
414 struct vm_gk20a *vm, const char *name,
415 u64 base, u64 size, u64 blk_size,
416 u64 max_order, u64 flags)
417{
418 int err;
419 struct gk20a_buddy_allocator *a;
420
421 /* blk_size must be greater than 0 and a power of 2. */
422 if (blk_size == 0)
423 return -EINVAL;
424 if (blk_size & (blk_size - 1))
425 return -EINVAL;
426
427 if (max_order > GPU_BALLOC_MAX_ORDER)
428 return -EINVAL;
429
430 /* If this is to manage a GVA space we need a VM. */
431 if (flags & GPU_BALLOC_GVA_SPACE && !vm)
432 return -EINVAL;
433
434 a = kzalloc(sizeof(struct gk20a_buddy_allocator), GFP_KERNEL);
435 if (!a)
436 return -ENOMEM;
437
438 err = __gk20a_alloc_common_init(__a, name, a, &buddy_ops);
439 if (err)
440 goto fail;
441
442 a->base = base;
443 a->length = size;
444 a->blk_size = blk_size;
445 a->blk_shift = __ffs(blk_size);
446 a->owner = __a;
447
448 /*
449 * If base is 0 then modfy base to be the size of one block so that we
450 * can return errors by returning addr == 0.
451 */
452 if (a->base == 0) {
453 a->base = a->blk_size;
454 a->length -= a->blk_size;
455 }
456
457 a->vm = vm;
458 if (flags & GPU_BALLOC_GVA_SPACE)
459 a->pte_blk_order = balloc_get_order(a, vm->big_page_size << 10);
460
461 a->flags = flags;
462 a->max_order = max_order;
463
464 balloc_allocator_align(a);
465 balloc_compute_max_order(a);
466
467 /* Shared buddy kmem_cache for all allocators. */
468 if (!buddy_cache)
469 buddy_cache = KMEM_CACHE(gk20a_buddy, 0);
470 if (!buddy_cache) {
471 err = -ENOMEM;
472 goto fail;
473 }
474
475 a->alloced_buddies = RB_ROOT;
476 a->fixed_allocs = RB_ROOT;
477 err = balloc_init_lists(a);
478 if (err)
479 goto fail;
480
481 a->inited = 1;
482
483 gk20a_init_alloc_debug(__a);
484 alloc_dbg(__a, "New allocator: base 0x%llx\n", a->base);
485 alloc_dbg(__a, " size 0x%llx\n", a->length);
486 alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size);
487 alloc_dbg(__a, " max_order %llu\n", a->max_order);
488 alloc_dbg(__a, " flags 0x%llx\n", a->flags);
489
490 return 0;
491
492fail:
493 kfree(a);
494 return err;
495}
496
497int gk20a_buddy_allocator_init(struct gk20a_allocator *a, const char *name,
498 u64 base, u64 size, u64 blk_size, u64 flags)
499{
500 return __gk20a_buddy_allocator_init(a, NULL, name,
501 base, size, blk_size, 0, 0);
502}
503
504/*
505 * Clean up and destroy the passed allocator.
506 */
507static void gk20a_buddy_allocator_destroy(struct gk20a_allocator *__a)
508{
509 int i;
510 struct rb_node *node;
511 struct gk20a_buddy *bud;
512 struct gk20a_fixed_alloc *falloc;
513 struct gk20a_buddy_allocator *a = __a->priv;
514
515 alloc_lock(__a);
516
517 if (!IS_ERR_OR_NULL(__a->debugfs_entry))
518 debugfs_remove(__a->debugfs_entry);
519
520 /*
521 * Free the fixed allocs first.
522 */
523 while ((node = rb_first(&a->fixed_allocs)) != NULL) {
524 falloc = container_of(node,
525 struct gk20a_fixed_alloc, alloced_entry);
526
527 rb_erase(node, &a->fixed_allocs);
528 __balloc_do_free_fixed(a, falloc);
529 }
530
531 /*
532 * And now free all outstanding allocations.
533 */
534 while ((node = rb_first(&a->alloced_buddies)) != NULL) {
535 bud = container_of(node, struct gk20a_buddy, alloced_entry);
536 balloc_free_buddy(a, bud->start);
537 balloc_blist_add(a, bud);
538 balloc_coalesce(a, bud);
539 }
540
541 /*
542 * Now clean up the unallocated buddies.
543 */
544 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) {
545 BUG_ON(a->buddy_list_alloced[i] != 0);
546
547 while (!list_empty(balloc_get_order_list(a, i))) {
548 bud = list_first_entry(balloc_get_order_list(a, i),
549 struct gk20a_buddy, buddy_entry);
550 balloc_blist_rem(a, bud);
551 kmem_cache_free(buddy_cache, bud);
552 }
553
554 if (a->buddy_list_len[i] != 0) {
555 pr_info("Excess buddies!!! (%d: %llu)\n",
556 i, a->buddy_list_len[i]);
557 BUG();
558 }
559 if (a->buddy_list_split[i] != 0) {
560 pr_info("Excess split nodes!!! (%d: %llu)\n",
561 i, a->buddy_list_split[i]);
562 BUG();
563 }
564 if (a->buddy_list_alloced[i] != 0) {
565 pr_info("Excess alloced nodes!!! (%d: %llu)\n",
566 i, a->buddy_list_alloced[i]);
567 BUG();
568 }
569 }
570
571 kfree(a);
572
573 alloc_unlock(__a);
574}
575
576/*
577 * Combine the passed buddy if possible. The pointer in @b may not be valid
578 * after this as the buddy may be freed.
579 *
580 * @a must be locked.
581 */
582static void balloc_coalesce(struct gk20a_buddy_allocator *a,
583 struct gk20a_buddy *b)
584{
585 struct gk20a_buddy *parent;
586
587 if (buddy_is_alloced(b) || buddy_is_split(b))
588 return;
589
590 /*
591 * If both our buddy and I are both not allocated and not split then
592 * we can coalesce ourselves.
593 */
594 if (!b->buddy)
595 return;
596 if (buddy_is_alloced(b->buddy) || buddy_is_split(b->buddy))
597 return;
598
599 parent = b->parent;
600
601 balloc_blist_rem(a, b);
602 balloc_blist_rem(a, b->buddy);
603
604 buddy_clr_split(parent);
605 a->buddy_list_split[parent->order]--;
606 balloc_blist_add(a, parent);
607
608 /*
609 * Recursively coalesce as far as we can go.
610 */
611 balloc_coalesce(a, parent);
612
613 /* Clean up the remains. */
614 kmem_cache_free(buddy_cache, b->buddy);
615 kmem_cache_free(buddy_cache, b);
616}
617
618/*
619 * Split a buddy into two new buddies who are 1/2 the size of the parent buddy.
620 *
621 * @a must be locked.
622 */
623static int balloc_split_buddy(struct gk20a_buddy_allocator *a,
624 struct gk20a_buddy *b, int pte_size)
625{
626 struct gk20a_buddy *left, *right;
627 u64 half;
628
629 left = balloc_new_buddy(a, b, b->start, b->order - 1);
630 if (!left)
631 return -ENOMEM;
632
633 half = (b->end - b->start) / 2;
634
635 right = balloc_new_buddy(a, b, b->start + half, b->order - 1);
636 if (!right) {
637 kmem_cache_free(buddy_cache, left);
638 return -ENOMEM;
639 }
640
641 buddy_set_split(b);
642 a->buddy_list_split[b->order]++;
643
644 b->left = left;
645 b->right = right;
646 left->buddy = right;
647 right->buddy = left;
648 left->parent = b;
649 right->parent = b;
650
651 /* PTE considerations. */
652 if (a->flags & GPU_BALLOC_GVA_SPACE &&
653 left->order <= a->pte_blk_order) {
654 left->pte_size = pte_size;
655 right->pte_size = pte_size;
656 }
657
658 balloc_blist_rem(a, b);
659 balloc_blist_add(a, left);
660 balloc_blist_add(a, right);
661
662 return 0;
663}
664
665/*
666 * Place the passed buddy into the RB tree for allocated buddies. Never fails
667 * unless the passed entry is a duplicate which is a bug.
668 *
669 * @a must be locked.
670 */
671static void balloc_alloc_buddy(struct gk20a_buddy_allocator *a,
672 struct gk20a_buddy *b)
673{
674 struct rb_node **new = &(a->alloced_buddies.rb_node);
675 struct rb_node *parent = NULL;
676
677 while (*new) {
678 struct gk20a_buddy *bud = container_of(*new, struct gk20a_buddy,
679 alloced_entry);
680
681 parent = *new;
682 if (b->start < bud->start)
683 new = &((*new)->rb_left);
684 else if (b->start > bud->start)
685 new = &((*new)->rb_right);
686 else
687 BUG_ON("Duplicate entries in allocated list!\n");
688 }
689
690 rb_link_node(&b->alloced_entry, parent, new);
691 rb_insert_color(&b->alloced_entry, &a->alloced_buddies);
692
693 buddy_set_alloced(b);
694 a->buddy_list_alloced[b->order]++;
695}
696
697/*
698 * Remove the passed buddy from the allocated buddy RB tree. Returns the
699 * deallocated buddy for further processing.
700 *
701 * @a must be locked.
702 */
703static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a,
704 u64 addr)
705{
706 struct rb_node *node = a->alloced_buddies.rb_node;
707 struct gk20a_buddy *bud;
708
709 while (node) {
710 bud = container_of(node, struct gk20a_buddy, alloced_entry);
711
712 if (addr < bud->start)
713 node = node->rb_left;
714 else if (addr > bud->start)
715 node = node->rb_right;
716 else
717 break;
718 }
719
720 if (!node)
721 return NULL;
722
723 rb_erase(node, &a->alloced_buddies);
724 buddy_clr_alloced(bud);
725 a->buddy_list_alloced[bud->order]--;
726
727 return bud;
728}
729
730/*
731 * Find a suitable buddy for the given order and PTE type (big or little).
732 */
733static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_buddy_allocator *a,
734 u64 order, int pte_size)
735{
736 struct gk20a_buddy *bud;
737
738 if (order > a->max_order ||
739 list_empty(balloc_get_order_list(a, order)))
740 return NULL;
741
742 if (a->flags & GPU_BALLOC_GVA_SPACE &&
743 pte_size == BALLOC_PTE_SIZE_BIG)
744 bud = list_last_entry(balloc_get_order_list(a, order),
745 struct gk20a_buddy, buddy_entry);
746 else
747 bud = list_first_entry(balloc_get_order_list(a, order),
748 struct gk20a_buddy, buddy_entry);
749
750 if (bud->pte_size != BALLOC_PTE_SIZE_ANY &&
751 bud->pte_size != pte_size)
752 return NULL;
753
754 return bud;
755}
756
757/*
758 * Allocate a suitably sized buddy. If no suitable buddy exists split higher
759 * order buddies until we have a suitable buddy to allocate.
760 *
761 * For PDE grouping add an extra check to see if a buddy is suitable: that the
762 * buddy exists in a PDE who's PTE size is reasonable
763 *
764 * @a must be locked.
765 */
766static u64 __balloc_do_alloc(struct gk20a_buddy_allocator *a,
767 u64 order, int pte_size)
768{
769 u64 split_order;
770 struct gk20a_buddy *bud = NULL;
771
772 split_order = order;
773 while (split_order <= a->max_order &&
774 !(bud = __balloc_find_buddy(a, split_order, pte_size)))
775 split_order++;
776
777 /* Out of memory! */
778 if (!bud)
779 return 0;
780
781 while (bud->order != order) {
782 if (balloc_split_buddy(a, bud, pte_size))
783 return 0; /* No mem... */
784 bud = bud->left;
785 }
786
787 balloc_blist_rem(a, bud);
788 balloc_alloc_buddy(a, bud);
789
790 return bud->start;
791}
792
793/*
794 * Allocate memory from the passed allocator.
795 */
796static u64 gk20a_buddy_alloc(struct gk20a_allocator *__a, u64 len)
797{
798 u64 order, addr;
799 int pte_size;
800 struct gk20a_buddy_allocator *a = __a->priv;
801
802 gk20a_alloc_trace_func();
803
804 alloc_lock(__a);
805
806 order = balloc_get_order(a, len);
807
808 if (order > a->max_order) {
809 alloc_unlock(__a);
810 alloc_dbg(balloc_owner(a), "Alloc fail\n");
811 gk20a_alloc_trace_func_done();
812 return 0;
813 }
814
815 /*
816 * For now pass the base address of the allocator's region to
817 * __get_pte_size(). This ensures we get the right page size for
818 * the alloc but we don't have to know what the real address is
819 * going to be quite yet.
820 *
821 * TODO: once userspace supports a unified address space pass 0 for
822 * the base. This will make only 'len' affect the PTE size.
823 */
824 if (a->flags & GPU_BALLOC_GVA_SPACE)
825 pte_size = __get_pte_size(a->vm, a->base, len);
826 else
827 pte_size = BALLOC_PTE_SIZE_ANY;
828
829 addr = __balloc_do_alloc(a, order, pte_size);
830
831 if (addr) {
832 a->bytes_alloced += len;
833 a->bytes_alloced_real += balloc_order_to_len(a, order);
834 alloc_dbg(balloc_owner(a),
835 "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n",
836 addr, order, len,
837 pte_size == gmmu_page_size_big ? "big" :
838 pte_size == gmmu_page_size_small ? "small" :
839 "NA/any");
840 } else {
841 alloc_dbg(balloc_owner(a), "Alloc failed: no mem!\n");
842 }
843
844 alloc_unlock(__a);
845
846 gk20a_alloc_trace_func_done();
847 return addr;
848}
849
850/*
851 * See if the passed range is actually available for allocation. If so, then
852 * return 1, otherwise return 0.
853 *
854 * TODO: Right now this uses the unoptimal approach of going through all
855 * outstanding allocations and checking their base/ends. This could be better.
856 */
857static int balloc_is_range_free(struct gk20a_buddy_allocator *a,
858 u64 base, u64 end)
859{
860 struct rb_node *node;
861 struct gk20a_buddy *bud;
862
863 node = rb_first(&a->alloced_buddies);
864 if (!node)
865 return 1; /* No allocs yet. */
866
867 bud = container_of(node, struct gk20a_buddy, alloced_entry);
868
869 while (bud->start < end) {
870 if ((bud->start > base && bud->start < end) ||
871 (bud->end > base && bud->end < end))
872 return 0;
873
874 node = rb_next(node);
875 if (!node)
876 break;
877 bud = container_of(node, struct gk20a_buddy, alloced_entry);
878 }
879
880 return 1;
881}
882
883static void balloc_alloc_fixed(struct gk20a_buddy_allocator *a,
884 struct gk20a_fixed_alloc *f)
885{
886 struct rb_node **new = &(a->fixed_allocs.rb_node);
887 struct rb_node *parent = NULL;
888
889 while (*new) {
890 struct gk20a_fixed_alloc *falloc =
891 container_of(*new, struct gk20a_fixed_alloc,
892 alloced_entry);
893
894 parent = *new;
895 if (f->start < falloc->start)
896 new = &((*new)->rb_left);
897 else if (f->start > falloc->start)
898 new = &((*new)->rb_right);
899 else
900 BUG_ON("Duplicate entries in allocated list!\n");
901 }
902
903 rb_link_node(&f->alloced_entry, parent, new);
904 rb_insert_color(&f->alloced_entry, &a->fixed_allocs);
905}
906
907/*
908 * Remove the passed buddy from the allocated buddy RB tree. Returns the
909 * deallocated buddy for further processing.
910 *
911 * @a must be locked.
912 */
913static struct gk20a_fixed_alloc *balloc_free_fixed(
914 struct gk20a_buddy_allocator *a, u64 addr)
915{
916 struct rb_node *node = a->fixed_allocs.rb_node;
917 struct gk20a_fixed_alloc *falloc;
918
919 while (node) {
920 falloc = container_of(node,
921 struct gk20a_fixed_alloc, alloced_entry);
922
923 if (addr < falloc->start)
924 node = node->rb_left;
925 else if (addr > falloc->start)
926 node = node->rb_right;
927 else
928 break;
929 }
930
931 if (!node)
932 return NULL;
933
934 rb_erase(node, &a->fixed_allocs);
935
936 return falloc;
937}
938
939/*
940 * Find the parent range - doesn't necessarily need the parent to actually exist
941 * as a buddy. Finding an existing parent comes later...
942 */
943static void __balloc_get_parent_range(struct gk20a_buddy_allocator *a,
944 u64 base, u64 order,
945 u64 *pbase, u64 *porder)
946{
947 u64 base_mask;
948 u64 shifted_base = balloc_base_shift(a, base);
949
950 order++;
951 base_mask = ~((a->blk_size << order) - 1);
952
953 shifted_base &= base_mask;
954
955 *pbase = balloc_base_unshift(a, shifted_base);
956 *porder = order;
957}
958
959/*
960 * Makes a buddy at the passed address. This will make all parent buddies
961 * necessary for this buddy to exist as well.
962 */
963static struct gk20a_buddy *__balloc_make_fixed_buddy(
964 struct gk20a_buddy_allocator *a, u64 base, u64 order)
965{
966 struct gk20a_buddy *bud = NULL;
967 struct list_head *order_list;
968 u64 cur_order = order, cur_base = base;
969
970 /*
971 * Algo:
972 * 1. Keep jumping up a buddy order until we find the real buddy that
973 * this buddy exists in.
974 * 2. Then work our way down through the buddy tree until we hit a dead
975 * end.
976 * 3. Start splitting buddies until we split to the one we need to
977 * make.
978 */
979 while (cur_order <= a->max_order) {
980 int found = 0;
981
982 order_list = balloc_get_order_list(a, cur_order);
983 list_for_each_entry(bud, order_list, buddy_entry) {
984 if (bud->start == cur_base) {
985 found = 1;
986 break;
987 }
988 }
989
990 if (found)
991 break;
992
993 __balloc_get_parent_range(a, cur_base, cur_order,
994 &cur_base, &cur_order);
995 }
996
997 if (cur_order > a->max_order) {
998 alloc_dbg(balloc_owner(a), "No buddy for range ???\n");
999 return NULL;
1000 }
1001
1002 /* Split this buddy as necessary until we get the target buddy. */
1003 while (bud->start != base || bud->order != order) {
1004 if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) {
1005 balloc_coalesce(a, bud);
1006 return NULL;
1007 }
1008
1009 if (base < bud->right->start)
1010 bud = bud->left;
1011 else
1012 bud = bud->right;
1013
1014 }
1015
1016 return bud;
1017}
1018
1019static u64 __balloc_do_alloc_fixed(struct gk20a_buddy_allocator *a,
1020 struct gk20a_fixed_alloc *falloc,
1021 u64 base, u64 len)
1022{
1023 u64 shifted_base, inc_base;
1024 u64 align_order;
1025
1026 shifted_base = balloc_base_shift(a, base);
1027 if (shifted_base == 0)
1028 align_order = __fls(len >> a->blk_shift);
1029 else
1030 align_order = min_t(u64,
1031 __ffs(shifted_base >> a->blk_shift),
1032 __fls(len >> a->blk_shift));
1033
1034 if (align_order > a->max_order) {
1035 alloc_dbg(balloc_owner(a),
1036 "Align order too big: %llu > %llu\n",
1037 align_order, a->max_order);
1038 return 0;
1039 }
1040
1041 /*
1042 * Generate a list of buddies that satisfy this allocation.
1043 */
1044 inc_base = shifted_base;
1045 while (inc_base < (shifted_base + len)) {
1046 u64 order_len = balloc_order_to_len(a, align_order);
1047 u64 remaining;
1048 struct gk20a_buddy *bud;
1049
1050 bud = __balloc_make_fixed_buddy(a,
1051 balloc_base_unshift(a, inc_base),
1052 align_order);
1053 if (!bud) {
1054 alloc_dbg(balloc_owner(a),
1055 "Fixed buddy failed: {0x%llx, %llu}!\n",
1056 balloc_base_unshift(a, inc_base),
1057 align_order);
1058 goto err_and_cleanup;
1059 }
1060
1061 balloc_blist_rem(a, bud);
1062 balloc_alloc_buddy(a, bud);
1063 __balloc_buddy_list_add(a, bud, &falloc->buddies);
1064
1065 /* Book keeping. */
1066 inc_base += order_len;
1067 remaining = (shifted_base + len) - inc_base;
1068 align_order = __ffs(inc_base >> a->blk_shift);
1069
1070 /* If we don't have much left - trim down align_order. */
1071 if (balloc_order_to_len(a, align_order) > remaining)
1072 align_order = __balloc_max_order_in(a, inc_base,
1073 inc_base + remaining);
1074 }
1075
1076 return base;
1077
1078err_and_cleanup:
1079 while (!list_empty(&falloc->buddies)) {
1080 struct gk20a_buddy *bud = list_first_entry(&falloc->buddies,
1081 struct gk20a_buddy,
1082 buddy_entry);
1083
1084 __balloc_buddy_list_rem(a, bud);
1085 balloc_free_buddy(a, bud->start);
1086 kmem_cache_free(buddy_cache, bud);
1087 }
1088
1089 return 0;
1090}
1091
1092/*
1093 * Allocate a fixed address allocation. The address of the allocation is @base
1094 * and the length is @len. This is not a typical buddy allocator operation and
1095 * as such has a high posibility of failure if the address space is heavily in
1096 * use.
1097 *
1098 * Please do not use this function unless _absolutely_ necessary.
1099 */
1100static u64 gk20a_buddy_alloc_fixed(struct gk20a_allocator *__a,
1101 u64 base, u64 len)
1102{
1103 u64 ret, real_bytes = 0;
1104 struct gk20a_buddy *bud;
1105 struct gk20a_fixed_alloc *falloc = NULL;
1106 struct gk20a_buddy_allocator *a = __a->priv;
1107
1108 gk20a_alloc_trace_func();
1109
1110 /* If base isn't aligned to an order 0 block, fail. */
1111 if (base & (a->blk_size - 1))
1112 goto fail;
1113
1114 if (len == 0)
1115 goto fail;
1116
1117 falloc = kmalloc(sizeof(*falloc), GFP_KERNEL);
1118 if (!falloc)
1119 goto fail;
1120
1121 INIT_LIST_HEAD(&falloc->buddies);
1122 falloc->start = base;
1123 falloc->end = base + len;
1124
1125 alloc_lock(__a);
1126 if (!balloc_is_range_free(a, base, base + len)) {
1127 alloc_dbg(balloc_owner(a),
1128 "Range not free: 0x%llx -> 0x%llx\n",
1129 base, base + len);
1130 goto fail_unlock;
1131 }
1132
1133 ret = __balloc_do_alloc_fixed(a, falloc, base, len);
1134 if (!ret) {
1135 alloc_dbg(balloc_owner(a),
1136 "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n",
1137 base, base + len);
1138 goto fail_unlock;
1139 }
1140
1141 balloc_alloc_fixed(a, falloc);
1142
1143 list_for_each_entry(bud, &falloc->buddies, buddy_entry)
1144 real_bytes += (bud->end - bud->start);
1145
1146 a->bytes_alloced += len;
1147 a->bytes_alloced_real += real_bytes;
1148
1149 alloc_unlock(__a);
1150 alloc_dbg(balloc_owner(a), "Alloc (fixed) 0x%llx\n", base);
1151
1152 gk20a_alloc_trace_func_done();
1153 return base;
1154
1155fail_unlock:
1156 alloc_unlock(__a);
1157fail:
1158 kfree(falloc);
1159 gk20a_alloc_trace_func_done();
1160 return 0;
1161}
1162
1163static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a,
1164 struct gk20a_fixed_alloc *falloc)
1165{
1166 struct gk20a_buddy *bud;
1167
1168 while (!list_empty(&falloc->buddies)) {
1169 bud = list_first_entry(&falloc->buddies,
1170 struct gk20a_buddy,
1171 buddy_entry);
1172 __balloc_buddy_list_rem(a, bud);
1173
1174 balloc_free_buddy(a, bud->start);
1175 balloc_blist_add(a, bud);
1176 a->bytes_freed += balloc_order_to_len(a, bud->order);
1177
1178 /*
1179 * Attemp to defrag the allocation.
1180 */
1181 balloc_coalesce(a, bud);
1182 }
1183
1184 kfree(falloc);
1185}
1186
1187/*
1188 * Free the passed allocation.
1189 */
1190static void gk20a_buddy_free(struct gk20a_allocator *__a, u64 addr)
1191{
1192 struct gk20a_buddy *bud;
1193 struct gk20a_fixed_alloc *falloc;
1194 struct gk20a_buddy_allocator *a = __a->priv;
1195
1196 gk20a_alloc_trace_func();
1197
1198 if (!addr) {
1199 gk20a_alloc_trace_func_done();
1200 return;
1201 }
1202
1203 alloc_lock(__a);
1204
1205 /*
1206 * First see if this is a fixed alloc. If not fall back to a regular
1207 * buddy.
1208 */
1209 falloc = balloc_free_fixed(a, addr);
1210 if (falloc) {
1211 __balloc_do_free_fixed(a, falloc);
1212 goto done;
1213 }
1214
1215 bud = balloc_free_buddy(a, addr);
1216 if (!bud)
1217 goto done;
1218
1219 balloc_blist_add(a, bud);
1220 a->bytes_freed += balloc_order_to_len(a, bud->order);
1221
1222 /*
1223 * Attemp to defrag the allocation.
1224 */
1225 balloc_coalesce(a, bud);
1226
1227done:
1228 alloc_unlock(__a);
1229 alloc_dbg(balloc_owner(a), "Free 0x%llx\n", addr);
1230 gk20a_alloc_trace_func_done();
1231 return;
1232}
1233
1234/*
1235 * Print the buddy allocator top level stats. If you pass @s as NULL then the
1236 * stats are printed to the kernel log. This lets this code be used for
1237 * debugging purposes internal to the allocator.
1238 */
1239static void gk20a_buddy_print_stats(struct gk20a_allocator *__a,
1240 struct seq_file *s, int lock)
1241{
1242 int i;
1243 struct rb_node *node;
1244 struct gk20a_fixed_alloc *falloc;
1245 struct gk20a_buddy_allocator *a = __a->priv;
1246
1247 __alloc_pstat(s, __a, "base = %llu, limit = %llu, blk_size = %llu\n",
1248 a->base, a->length, a->blk_size);
1249 __alloc_pstat(s, __a, "Internal params:\n");
1250 __alloc_pstat(s, __a, " start = 0x%llx\n", a->start);
1251 __alloc_pstat(s, __a, " end = 0x%llx\n", a->end);
1252 __alloc_pstat(s, __a, " count = 0x%llx\n", a->count);
1253 __alloc_pstat(s, __a, " blks = 0x%llx\n", a->blks);
1254 __alloc_pstat(s, __a, " max_order = %llu\n", a->max_order);
1255
1256 __alloc_pstat(s, __a, "Buddy blocks:\n");
1257 __alloc_pstat(s, __a, " Order Free Alloced Split\n");
1258 __alloc_pstat(s, __a, " ----- ---- ------- -----\n");
1259
1260 if (lock)
1261 alloc_lock(__a);
1262 for (i = a->max_order; i >= 0; i--) {
1263 if (a->buddy_list_len[i] == 0 &&
1264 a->buddy_list_alloced[i] == 0 &&
1265 a->buddy_list_split[i] == 0)
1266 continue;
1267
1268 __alloc_pstat(s, __a, " %3d %-7llu %-9llu %llu\n", i,
1269 a->buddy_list_len[i],
1270 a->buddy_list_alloced[i],
1271 a->buddy_list_split[i]);
1272 }
1273
1274 __alloc_pstat(s, __a, "\n");
1275
1276 for (node = rb_first(&a->fixed_allocs), i = 1;
1277 node != NULL;
1278 node = rb_next(node)) {
1279 falloc = container_of(node,
1280 struct gk20a_fixed_alloc, alloced_entry);
1281
1282 __alloc_pstat(s, __a, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n",
1283 i, falloc->start, falloc->end);
1284 }
1285
1286 __alloc_pstat(s, __a, "\n");
1287 __alloc_pstat(s, __a, "Bytes allocated: %llu\n",
1288 a->bytes_alloced);
1289 __alloc_pstat(s, __a, "Bytes allocated (real): %llu\n",
1290 a->bytes_alloced_real);
1291 __alloc_pstat(s, __a, "Bytes freed: %llu\n",
1292 a->bytes_freed);
1293
1294 if (lock)
1295 alloc_unlock(__a);
1296}
1297
1298void gk20a_alloc_print_stats(struct gk20a_allocator *__a, 105void gk20a_alloc_print_stats(struct gk20a_allocator *__a,
1299 struct seq_file *s, int lock) 106 struct seq_file *s, int lock)
1300{ 107{
@@ -1322,7 +129,7 @@ static const struct file_operations __alloc_fops = {
1322 .release = single_release, 129 .release = single_release,
1323}; 130};
1324 131
1325static void gk20a_init_alloc_debug(struct gk20a_allocator *a) 132void gk20a_init_alloc_debug(struct gk20a_allocator *a)
1326{ 133{
1327 if (!gk20a_alloc_debugfs_root) 134 if (!gk20a_alloc_debugfs_root)
1328 return; 135 return;
@@ -1332,7 +139,15 @@ static void gk20a_init_alloc_debug(struct gk20a_allocator *a)
1332 a, &__alloc_fops); 139 a, &__alloc_fops);
1333} 140}
1334 141
1335#ifdef CONFIG_DEBUG_FS 142void gk20a_fini_alloc_debug(struct gk20a_allocator *a)
143{
144 if (!gk20a_alloc_debugfs_root)
145 return;
146
147 if (!IS_ERR_OR_NULL(a->debugfs_entry))
148 debugfs_remove(a->debugfs_entry);
149}
150
1336void gk20a_alloc_debugfs_init(struct platform_device *pdev) 151void gk20a_alloc_debugfs_init(struct platform_device *pdev)
1337{ 152{
1338 struct gk20a_platform *platform = platform_get_drvdata(pdev); 153 struct gk20a_platform *platform = platform_get_drvdata(pdev);
@@ -1345,4 +160,3 @@ void gk20a_alloc_debugfs_init(struct platform_device *pdev)
1345 debugfs_create_u32("tracing", 0664, gk20a_alloc_debugfs_root, 160 debugfs_create_u32("tracing", 0664, gk20a_alloc_debugfs_root,
1346 &gk20a_alloc_tracing_on); 161 &gk20a_alloc_tracing_on);
1347} 162}
1348#endif
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
index 74e23e6c..06819700 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
@@ -17,8 +17,6 @@
17#ifndef GK20A_ALLOCATOR_H 17#ifndef GK20A_ALLOCATOR_H
18#define GK20A_ALLOCATOR_H 18#define GK20A_ALLOCATOR_H
19 19
20#include <linux/list.h>
21#include <linux/rbtree.h>
22#include <linux/debugfs.h> 20#include <linux/debugfs.h>
23#include <linux/seq_file.h> 21#include <linux/seq_file.h>
24#include <linux/platform_device.h> 22#include <linux/platform_device.h>
@@ -63,138 +61,6 @@ struct gk20a_allocator_ops {
63 struct seq_file *s, int lock); 61 struct seq_file *s, int lock);
64}; 62};
65 63
66/*
67 * Each buddy is an element in a binary tree.
68 */
69struct gk20a_buddy {
70 struct gk20a_buddy *parent; /* Parent node. */
71 struct gk20a_buddy *buddy; /* This node's buddy. */
72 struct gk20a_buddy *left; /* Lower address sub-node. */
73 struct gk20a_buddy *right; /* Higher address sub-node. */
74
75 struct list_head buddy_entry; /* List entry for various lists. */
76 struct rb_node alloced_entry; /* RB tree of allocations. */
77
78 u64 start; /* Start address of this buddy. */
79 u64 end; /* End address of this buddy. */
80 u64 order; /* Buddy order. */
81
82#define BALLOC_BUDDY_ALLOCED 0x1
83#define BALLOC_BUDDY_SPLIT 0x2
84#define BALLOC_BUDDY_IN_LIST 0x4
85 int flags; /* List of associated flags. */
86
87 /*
88 * Size of the PDE this buddy is using. This allows for grouping like
89 * sized allocations into the same PDE.
90 */
91#define BALLOC_PTE_SIZE_ANY 0x0
92#define BALLOC_PTE_SIZE_SMALL 0x1
93#define BALLOC_PTE_SIZE_BIG 0x2
94 int pte_size;
95};
96
97#define __buddy_flag_ops(flag, flag_up) \
98 static inline int buddy_is_ ## flag(struct gk20a_buddy *b) \
99 { \
100 return b->flags & BALLOC_BUDDY_ ## flag_up; \
101 } \
102 static inline void buddy_set_ ## flag(struct gk20a_buddy *b) \
103 { \
104 b->flags |= BALLOC_BUDDY_ ## flag_up; \
105 } \
106 static inline void buddy_clr_ ## flag(struct gk20a_buddy *b) \
107 { \
108 b->flags &= ~BALLOC_BUDDY_ ## flag_up; \
109 }
110
111/*
112 * int buddy_is_alloced(struct gk20a_buddy *b);
113 * void buddy_set_alloced(struct gk20a_buddy *b);
114 * void buddy_clr_alloced(struct gk20a_buddy *b);
115 *
116 * int buddy_is_split(struct gk20a_buddy *b);
117 * void buddy_set_split(struct gk20a_buddy *b);
118 * void buddy_clr_split(struct gk20a_buddy *b);
119 *
120 * int buddy_is_in_list(struct gk20a_buddy *b);
121 * void buddy_set_in_list(struct gk20a_buddy *b);
122 * void buddy_clr_in_list(struct gk20a_buddy *b);
123 */
124__buddy_flag_ops(alloced, ALLOCED);
125__buddy_flag_ops(split, SPLIT);
126__buddy_flag_ops(in_list, IN_LIST);
127
128/*
129 * Keeps info for a fixed allocation.
130 */
131struct gk20a_fixed_alloc {
132 struct list_head buddies; /* List of buddies. */
133 struct rb_node alloced_entry; /* RB tree of fixed allocations. */
134
135 u64 start; /* Start of fixed block. */
136 u64 end; /* End address. */
137};
138
139/*
140 * GPU buddy allocator for the various GPU address spaces. Each addressable unit
141 * doesn't have to correspond to a byte. In some cases each unit is a more
142 * complex object such as a comp_tag line or the like.
143 *
144 * The max order is computed based on the size of the minimum order and the size
145 * of the address space.
146 *
147 * order_size is the size of an order 0 buddy.
148 */
149struct gk20a_buddy_allocator {
150 struct gk20a_allocator *owner; /* Owner of this buddy allocator. */
151 struct vm_gk20a *vm; /* Parent VM - can be NULL. */
152
153 u64 base; /* Base address of the space. */
154 u64 length; /* Length of the space. */
155 u64 blk_size; /* Size of order 0 allocation. */
156 u64 blk_shift; /* Shift to divide by blk_size. */
157
158 int init; /* Non-zero if initialized. */
159
160 /* Internal stuff. */
161 u64 start; /* Real start (aligned to blk_size). */
162 u64 end; /* Real end, trimmed if needed. */
163 u64 count; /* Count of objects in space. */
164 u64 blks; /* Count of blks in the space. */
165 u64 max_order; /* Specific maximum order. */
166
167 struct rb_root alloced_buddies; /* Outstanding allocations. */
168 struct rb_root fixed_allocs; /* Outstanding fixed allocations. */
169
170 /*
171 * Impose an upper bound on the maximum order.
172 */
173#define GPU_BALLOC_MAX_ORDER 31
174#define GPU_BALLOC_ORDER_LIST_LEN (GPU_BALLOC_MAX_ORDER + 1)
175
176 struct list_head buddy_list[GPU_BALLOC_ORDER_LIST_LEN];
177 u64 buddy_list_len[GPU_BALLOC_ORDER_LIST_LEN];
178 u64 buddy_list_split[GPU_BALLOC_ORDER_LIST_LEN];
179 u64 buddy_list_alloced[GPU_BALLOC_ORDER_LIST_LEN];
180
181 /*
182 * This is for when the allocator is managing a GVA space (the
183 * GPU_BALLOC_GVA_SPACE bit is set in @flags). This requires
184 * that we group like sized allocations into PDE blocks.
185 */
186 u64 pte_blk_order;
187
188 int inited;
189
190#define GPU_BALLOC_GVA_SPACE 0x1
191 u64 flags;
192
193 u64 bytes_alloced;
194 u64 bytes_alloced_real;
195 u64 bytes_freed;
196};
197
198struct gk20a_allocator { 64struct gk20a_allocator {
199 char name[32]; 65 char name[32];
200 struct mutex lock; 66 struct mutex lock;
@@ -205,6 +71,11 @@ struct gk20a_allocator {
205 struct dentry *debugfs_entry; 71 struct dentry *debugfs_entry;
206}; 72};
207 73
74/*
75 * Allocator flags.
76 */
77#define GPU_BALLOC_GVA_SPACE 0x1
78
208static inline void alloc_lock(struct gk20a_allocator *a) 79static inline void alloc_lock(struct gk20a_allocator *a)
209{ 80{
210 mutex_lock(&a->lock); 81 mutex_lock(&a->lock);
@@ -215,42 +86,6 @@ static inline void alloc_unlock(struct gk20a_allocator *a)
215 mutex_unlock(&a->lock); 86 mutex_unlock(&a->lock);
216} 87}
217 88
218static inline struct gk20a_buddy_allocator *buddy_allocator(
219 struct gk20a_allocator *a)
220{
221 return (struct gk20a_buddy_allocator *)a->priv;
222}
223
224static inline struct list_head *balloc_get_order_list(
225 struct gk20a_buddy_allocator *a, int order)
226{
227 return &a->buddy_list[order];
228}
229
230static inline u64 balloc_order_to_len(struct gk20a_buddy_allocator *a,
231 int order)
232{
233 return (1 << order) * a->blk_size;
234}
235
236static inline u64 balloc_base_shift(struct gk20a_buddy_allocator *a,
237 u64 base)
238{
239 return base - a->start;
240}
241
242static inline u64 balloc_base_unshift(struct gk20a_buddy_allocator *a,
243 u64 base)
244{
245 return base + a->start;
246}
247
248static inline struct gk20a_allocator *balloc_owner(
249 struct gk20a_buddy_allocator *a)
250{
251 return a->owner;
252}
253
254/* 89/*
255 * Buddy allocator specific initializers. 90 * Buddy allocator specific initializers.
256 */ 91 */
@@ -262,6 +97,8 @@ int gk20a_buddy_allocator_init(struct gk20a_allocator *allocator,
262 const char *name, u64 base, u64 size, 97 const char *name, u64 base, u64 size,
263 u64 blk_size, u64 flags); 98 u64 blk_size, u64 flags);
264 99
100#define GPU_BALLOC_MAX_ORDER 31
101
265/* 102/*
266 * Allocator APIs. 103 * Allocator APIs.
267 */ 104 */
@@ -282,10 +119,33 @@ void gk20a_alloc_print_stats(struct gk20a_allocator *a,
282 struct seq_file *s, int lock); 119 struct seq_file *s, int lock);
283 120
284/* 121/*
122 * Common functionality for the internals of the allocators.
123 */
124void gk20a_init_alloc_debug(struct gk20a_allocator *a);
125void gk20a_fini_alloc_debug(struct gk20a_allocator *a);
126int __gk20a_alloc_common_init(struct gk20a_allocator *a,
127 const char *name, void *priv,
128 const struct gk20a_allocator_ops *ops);
129
130/*
285 * Debug stuff. 131 * Debug stuff.
286 */ 132 */
133extern u32 gk20a_alloc_tracing_on;
134
287void gk20a_alloc_debugfs_init(struct platform_device *pdev); 135void gk20a_alloc_debugfs_init(struct platform_device *pdev);
288 136
137#define gk20a_alloc_trace_func() \
138 do { \
139 if (gk20a_alloc_tracing_on) \
140 trace_printk("%s\n", __func__); \
141 } while (0)
142
143#define gk20a_alloc_trace_func_done() \
144 do { \
145 if (gk20a_alloc_tracing_on) \
146 trace_printk("%s_done\n", __func__); \
147 } while (0)
148
289#define __alloc_pstat(seq, allocator, fmt, arg...) \ 149#define __alloc_pstat(seq, allocator, fmt, arg...) \
290 do { \ 150 do { \
291 if (s) \ 151 if (s) \
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c
new file mode 100644
index 00000000..4a1df339
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c
@@ -0,0 +1,1187 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/kernel.h>
18#include <linux/slab.h>
19
20#include "mm_gk20a.h"
21#include "platform_gk20a.h"
22#include "gk20a_allocator.h"
23#include "buddy_allocator_priv.h"
24
25static struct kmem_cache *buddy_cache; /* slab cache for meta data. */
26
27/* Some other buddy allocator functions. */
28static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a,
29 u64 addr);
30static void balloc_coalesce(struct gk20a_buddy_allocator *a,
31 struct gk20a_buddy *b);
32static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a,
33 struct gk20a_fixed_alloc *falloc);
34
35/*
36 * This function is not present in older kernel's list.h code.
37 */
38#ifndef list_last_entry
39#define list_last_entry(ptr, type, member) \
40 list_entry((ptr)->prev, type, member)
41#endif
42
43/*
44 * GPU buddy allocator for various address spaces.
45 *
46 * Current limitations:
47 * o A fixed allocation could potentially be made that borders PDEs with
48 * different PTE sizes. This would require that fixed buffer to have
49 * different sized PTEs for different parts of the allocation. Probably
50 * best to just require PDE alignment for fixed address allocs.
51 *
52 * o It is currently possible to make an allocator that has a buddy alignment
53 * out of sync with the PDE block size alignment. A simple example is a
54 * 32GB address space starting at byte 1. Every buddy is shifted off by 1
55 * which means each buddy corresponf to more than one actual GPU page. The
56 * best way to fix this is probably just require PDE blocksize alignment
57 * for the start of the address space. At the moment all allocators are
58 * easily PDE aligned so this hasn't been a problem.
59 */
60
61/*
62 * Pick a suitable maximum order for this allocator.
63 *
64 * Hueristic: Just guessing that the best max order is the largest single
65 * block that will fit in the address space.
66 */
67static void balloc_compute_max_order(struct gk20a_buddy_allocator *a)
68{
69 u64 true_max_order = ilog2(a->blks);
70
71 if (a->max_order == 0) {
72 a->max_order = true_max_order;
73 return;
74 }
75
76 if (a->max_order > true_max_order)
77 a->max_order = true_max_order;
78 if (a->max_order > GPU_BALLOC_MAX_ORDER)
79 a->max_order = GPU_BALLOC_MAX_ORDER;
80}
81
82/*
83 * Since we can only allocate in chucks of a->blk_size we need to trim off
84 * any excess data that is not aligned to a->blk_size.
85 */
86static void balloc_allocator_align(struct gk20a_buddy_allocator *a)
87{
88 a->start = ALIGN(a->base, a->blk_size);
89 WARN_ON(a->start != a->base);
90 a->end = (a->base + a->length) & ~(a->blk_size - 1);
91 a->count = a->end - a->start;
92 a->blks = a->count >> a->blk_shift;
93}
94
95/*
96 * Pass NULL for parent if you want a top level buddy.
97 */
98static struct gk20a_buddy *balloc_new_buddy(struct gk20a_buddy_allocator *a,
99 struct gk20a_buddy *parent,
100 u64 start, u64 order)
101{
102 struct gk20a_buddy *new_buddy;
103
104 new_buddy = kmem_cache_alloc(buddy_cache, GFP_KERNEL);
105 if (!new_buddy)
106 return NULL;
107
108 memset(new_buddy, 0, sizeof(struct gk20a_buddy));
109
110 new_buddy->parent = parent;
111 new_buddy->start = start;
112 new_buddy->order = order;
113 new_buddy->end = start + (1 << order) * a->blk_size;
114
115 return new_buddy;
116}
117
118static void __balloc_buddy_list_add(struct gk20a_buddy_allocator *a,
119 struct gk20a_buddy *b,
120 struct list_head *list)
121{
122 if (buddy_is_in_list(b)) {
123 alloc_dbg(balloc_owner(a),
124 "Oops: adding added buddy (%llu:0x%llx)\n",
125 b->order, b->start);
126 BUG();
127 }
128
129 /*
130 * Add big PTE blocks to the tail, small to the head for GVA spaces.
131 * This lets the code that checks if there are available blocks check
132 * without cycling through the entire list.
133 */
134 if (a->flags & GPU_BALLOC_GVA_SPACE &&
135 b->pte_size == BALLOC_PTE_SIZE_BIG)
136 list_add_tail(&b->buddy_entry, list);
137 else
138 list_add(&b->buddy_entry, list);
139
140 buddy_set_in_list(b);
141}
142
143static void __balloc_buddy_list_rem(struct gk20a_buddy_allocator *a,
144 struct gk20a_buddy *b)
145{
146 if (!buddy_is_in_list(b)) {
147 alloc_dbg(balloc_owner(a),
148 "Oops: removing removed buddy (%llu:0x%llx)\n",
149 b->order, b->start);
150 BUG();
151 }
152
153 list_del_init(&b->buddy_entry);
154 buddy_clr_in_list(b);
155}
156
157/*
158 * Add a buddy to one of the buddy lists and deal with the necessary
159 * book keeping. Adds the buddy to the list specified by the buddy's order.
160 */
161static void balloc_blist_add(struct gk20a_buddy_allocator *a,
162 struct gk20a_buddy *b)
163{
164 __balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order));
165 a->buddy_list_len[b->order]++;
166}
167
168static void balloc_blist_rem(struct gk20a_buddy_allocator *a,
169 struct gk20a_buddy *b)
170{
171 __balloc_buddy_list_rem(a, b);
172 a->buddy_list_len[b->order]--;
173}
174
175static u64 balloc_get_order(struct gk20a_buddy_allocator *a, u64 len)
176{
177 if (len == 0)
178 return 0;
179
180 len--;
181 len >>= a->blk_shift;
182
183 return fls(len);
184}
185
186static u64 __balloc_max_order_in(struct gk20a_buddy_allocator *a,
187 u64 start, u64 end)
188{
189 u64 size = (end - start) >> a->blk_shift;
190
191 if (size > 0)
192 return min_t(u64, ilog2(size), a->max_order);
193 else
194 return GPU_BALLOC_MAX_ORDER;
195}
196
197/*
198 * Initialize the buddy lists.
199 */
200static int balloc_init_lists(struct gk20a_buddy_allocator *a)
201{
202 int i;
203 u64 bstart, bend, order;
204 struct gk20a_buddy *buddy;
205
206 bstart = a->start;
207 bend = a->end;
208
209 /* First make sure the LLs are valid. */
210 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++)
211 INIT_LIST_HEAD(balloc_get_order_list(a, i));
212
213 while (bstart < bend) {
214 order = __balloc_max_order_in(a, bstart, bend);
215
216 buddy = balloc_new_buddy(a, NULL, bstart, order);
217 if (!buddy)
218 goto cleanup;
219
220 balloc_blist_add(a, buddy);
221 bstart += balloc_order_to_len(a, order);
222 }
223
224 return 0;
225
226cleanup:
227 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) {
228 if (!list_empty(balloc_get_order_list(a, i))) {
229 buddy = list_first_entry(balloc_get_order_list(a, i),
230 struct gk20a_buddy, buddy_entry);
231 balloc_blist_rem(a, buddy);
232 kmem_cache_free(buddy_cache, buddy);
233 }
234 }
235
236 return -ENOMEM;
237}
238
239/*
240 * Clean up and destroy the passed allocator.
241 */
242static void gk20a_buddy_allocator_destroy(struct gk20a_allocator *__a)
243{
244 int i;
245 struct rb_node *node;
246 struct gk20a_buddy *bud;
247 struct gk20a_fixed_alloc *falloc;
248 struct gk20a_buddy_allocator *a = __a->priv;
249
250 alloc_lock(__a);
251
252 gk20a_fini_alloc_debug(__a);
253
254 /*
255 * Free the fixed allocs first.
256 */
257 while ((node = rb_first(&a->fixed_allocs)) != NULL) {
258 falloc = container_of(node,
259 struct gk20a_fixed_alloc, alloced_entry);
260
261 rb_erase(node, &a->fixed_allocs);
262 __balloc_do_free_fixed(a, falloc);
263 }
264
265 /*
266 * And now free all outstanding allocations.
267 */
268 while ((node = rb_first(&a->alloced_buddies)) != NULL) {
269 bud = container_of(node, struct gk20a_buddy, alloced_entry);
270 balloc_free_buddy(a, bud->start);
271 balloc_blist_add(a, bud);
272 balloc_coalesce(a, bud);
273 }
274
275 /*
276 * Now clean up the unallocated buddies.
277 */
278 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) {
279 BUG_ON(a->buddy_list_alloced[i] != 0);
280
281 while (!list_empty(balloc_get_order_list(a, i))) {
282 bud = list_first_entry(balloc_get_order_list(a, i),
283 struct gk20a_buddy, buddy_entry);
284 balloc_blist_rem(a, bud);
285 kmem_cache_free(buddy_cache, bud);
286 }
287
288 if (a->buddy_list_len[i] != 0) {
289 pr_info("Excess buddies!!! (%d: %llu)\n",
290 i, a->buddy_list_len[i]);
291 BUG();
292 }
293 if (a->buddy_list_split[i] != 0) {
294 pr_info("Excess split nodes!!! (%d: %llu)\n",
295 i, a->buddy_list_split[i]);
296 BUG();
297 }
298 if (a->buddy_list_alloced[i] != 0) {
299 pr_info("Excess alloced nodes!!! (%d: %llu)\n",
300 i, a->buddy_list_alloced[i]);
301 BUG();
302 }
303 }
304
305 kfree(a);
306
307 alloc_unlock(__a);
308}
309
310/*
311 * Combine the passed buddy if possible. The pointer in @b may not be valid
312 * after this as the buddy may be freed.
313 *
314 * @a must be locked.
315 */
316static void balloc_coalesce(struct gk20a_buddy_allocator *a,
317 struct gk20a_buddy *b)
318{
319 struct gk20a_buddy *parent;
320
321 if (buddy_is_alloced(b) || buddy_is_split(b))
322 return;
323
324 /*
325 * If both our buddy and I are both not allocated and not split then
326 * we can coalesce ourselves.
327 */
328 if (!b->buddy)
329 return;
330 if (buddy_is_alloced(b->buddy) || buddy_is_split(b->buddy))
331 return;
332
333 parent = b->parent;
334
335 balloc_blist_rem(a, b);
336 balloc_blist_rem(a, b->buddy);
337
338 buddy_clr_split(parent);
339 a->buddy_list_split[parent->order]--;
340 balloc_blist_add(a, parent);
341
342 /*
343 * Recursively coalesce as far as we can go.
344 */
345 balloc_coalesce(a, parent);
346
347 /* Clean up the remains. */
348 kmem_cache_free(buddy_cache, b->buddy);
349 kmem_cache_free(buddy_cache, b);
350}
351
352/*
353 * Split a buddy into two new buddies who are 1/2 the size of the parent buddy.
354 *
355 * @a must be locked.
356 */
357static int balloc_split_buddy(struct gk20a_buddy_allocator *a,
358 struct gk20a_buddy *b, int pte_size)
359{
360 struct gk20a_buddy *left, *right;
361 u64 half;
362
363 left = balloc_new_buddy(a, b, b->start, b->order - 1);
364 if (!left)
365 return -ENOMEM;
366
367 half = (b->end - b->start) / 2;
368
369 right = balloc_new_buddy(a, b, b->start + half, b->order - 1);
370 if (!right) {
371 kmem_cache_free(buddy_cache, left);
372 return -ENOMEM;
373 }
374
375 buddy_set_split(b);
376 a->buddy_list_split[b->order]++;
377
378 b->left = left;
379 b->right = right;
380 left->buddy = right;
381 right->buddy = left;
382 left->parent = b;
383 right->parent = b;
384
385 /* PTE considerations. */
386 if (a->flags & GPU_BALLOC_GVA_SPACE &&
387 left->order <= a->pte_blk_order) {
388 left->pte_size = pte_size;
389 right->pte_size = pte_size;
390 }
391
392 balloc_blist_rem(a, b);
393 balloc_blist_add(a, left);
394 balloc_blist_add(a, right);
395
396 return 0;
397}
398
399/*
400 * Place the passed buddy into the RB tree for allocated buddies. Never fails
401 * unless the passed entry is a duplicate which is a bug.
402 *
403 * @a must be locked.
404 */
405static void balloc_alloc_buddy(struct gk20a_buddy_allocator *a,
406 struct gk20a_buddy *b)
407{
408 struct rb_node **new = &(a->alloced_buddies.rb_node);
409 struct rb_node *parent = NULL;
410
411 while (*new) {
412 struct gk20a_buddy *bud = container_of(*new, struct gk20a_buddy,
413 alloced_entry);
414
415 parent = *new;
416 if (b->start < bud->start)
417 new = &((*new)->rb_left);
418 else if (b->start > bud->start)
419 new = &((*new)->rb_right);
420 else
421 BUG_ON("Duplicate entries in allocated list!\n");
422 }
423
424 rb_link_node(&b->alloced_entry, parent, new);
425 rb_insert_color(&b->alloced_entry, &a->alloced_buddies);
426
427 buddy_set_alloced(b);
428 a->buddy_list_alloced[b->order]++;
429}
430
431/*
432 * Remove the passed buddy from the allocated buddy RB tree. Returns the
433 * deallocated buddy for further processing.
434 *
435 * @a must be locked.
436 */
437static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a,
438 u64 addr)
439{
440 struct rb_node *node = a->alloced_buddies.rb_node;
441 struct gk20a_buddy *bud;
442
443 while (node) {
444 bud = container_of(node, struct gk20a_buddy, alloced_entry);
445
446 if (addr < bud->start)
447 node = node->rb_left;
448 else if (addr > bud->start)
449 node = node->rb_right;
450 else
451 break;
452 }
453
454 if (!node)
455 return NULL;
456
457 rb_erase(node, &a->alloced_buddies);
458 buddy_clr_alloced(bud);
459 a->buddy_list_alloced[bud->order]--;
460
461 return bud;
462}
463
464/*
465 * Find a suitable buddy for the given order and PTE type (big or little).
466 */
467static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_buddy_allocator *a,
468 u64 order, int pte_size)
469{
470 struct gk20a_buddy *bud;
471
472 if (order > a->max_order ||
473 list_empty(balloc_get_order_list(a, order)))
474 return NULL;
475
476 if (a->flags & GPU_BALLOC_GVA_SPACE &&
477 pte_size == BALLOC_PTE_SIZE_BIG)
478 bud = list_last_entry(balloc_get_order_list(a, order),
479 struct gk20a_buddy, buddy_entry);
480 else
481 bud = list_first_entry(balloc_get_order_list(a, order),
482 struct gk20a_buddy, buddy_entry);
483
484 if (bud->pte_size != BALLOC_PTE_SIZE_ANY &&
485 bud->pte_size != pte_size)
486 return NULL;
487
488 return bud;
489}
490
491/*
492 * Allocate a suitably sized buddy. If no suitable buddy exists split higher
493 * order buddies until we have a suitable buddy to allocate.
494 *
495 * For PDE grouping add an extra check to see if a buddy is suitable: that the
496 * buddy exists in a PDE who's PTE size is reasonable
497 *
498 * @a must be locked.
499 */
500static u64 __balloc_do_alloc(struct gk20a_buddy_allocator *a,
501 u64 order, int pte_size)
502{
503 u64 split_order;
504 struct gk20a_buddy *bud = NULL;
505
506 split_order = order;
507 while (split_order <= a->max_order &&
508 !(bud = __balloc_find_buddy(a, split_order, pte_size)))
509 split_order++;
510
511 /* Out of memory! */
512 if (!bud)
513 return 0;
514
515 while (bud->order != order) {
516 if (balloc_split_buddy(a, bud, pte_size))
517 return 0; /* No mem... */
518 bud = bud->left;
519 }
520
521 balloc_blist_rem(a, bud);
522 balloc_alloc_buddy(a, bud);
523
524 return bud->start;
525}
526
527/*
528 * See if the passed range is actually available for allocation. If so, then
529 * return 1, otherwise return 0.
530 *
531 * TODO: Right now this uses the unoptimal approach of going through all
532 * outstanding allocations and checking their base/ends. This could be better.
533 */
534static int balloc_is_range_free(struct gk20a_buddy_allocator *a,
535 u64 base, u64 end)
536{
537 struct rb_node *node;
538 struct gk20a_buddy *bud;
539
540 node = rb_first(&a->alloced_buddies);
541 if (!node)
542 return 1; /* No allocs yet. */
543
544 bud = container_of(node, struct gk20a_buddy, alloced_entry);
545
546 while (bud->start < end) {
547 if ((bud->start > base && bud->start < end) ||
548 (bud->end > base && bud->end < end))
549 return 0;
550
551 node = rb_next(node);
552 if (!node)
553 break;
554 bud = container_of(node, struct gk20a_buddy, alloced_entry);
555 }
556
557 return 1;
558}
559
560static void balloc_alloc_fixed(struct gk20a_buddy_allocator *a,
561 struct gk20a_fixed_alloc *f)
562{
563 struct rb_node **new = &(a->fixed_allocs.rb_node);
564 struct rb_node *parent = NULL;
565
566 while (*new) {
567 struct gk20a_fixed_alloc *falloc =
568 container_of(*new, struct gk20a_fixed_alloc,
569 alloced_entry);
570
571 BUG_ON(!virt_addr_valid(falloc));
572
573 parent = *new;
574 if (f->start < falloc->start)
575 new = &((*new)->rb_left);
576 else if (f->start > falloc->start)
577 new = &((*new)->rb_right);
578 else
579 BUG_ON("Duplicate entries in allocated list!\n");
580 }
581
582 rb_link_node(&f->alloced_entry, parent, new);
583 rb_insert_color(&f->alloced_entry, &a->fixed_allocs);
584}
585
586/*
587 * Remove the passed buddy from the allocated buddy RB tree. Returns the
588 * deallocated buddy for further processing.
589 *
590 * @a must be locked.
591 */
592static struct gk20a_fixed_alloc *balloc_free_fixed(
593 struct gk20a_buddy_allocator *a, u64 addr)
594{
595 struct rb_node *node = a->fixed_allocs.rb_node;
596 struct gk20a_fixed_alloc *falloc;
597
598 while (node) {
599 falloc = container_of(node,
600 struct gk20a_fixed_alloc, alloced_entry);
601
602 if (addr < falloc->start)
603 node = node->rb_left;
604 else if (addr > falloc->start)
605 node = node->rb_right;
606 else
607 break;
608 }
609
610 if (!node)
611 return NULL;
612
613 rb_erase(node, &a->fixed_allocs);
614
615 return falloc;
616}
617
618/*
619 * Find the parent range - doesn't necessarily need the parent to actually exist
620 * as a buddy. Finding an existing parent comes later...
621 */
622static void __balloc_get_parent_range(struct gk20a_buddy_allocator *a,
623 u64 base, u64 order,
624 u64 *pbase, u64 *porder)
625{
626 u64 base_mask;
627 u64 shifted_base = balloc_base_shift(a, base);
628
629 order++;
630 base_mask = ~((a->blk_size << order) - 1);
631
632 shifted_base &= base_mask;
633
634 *pbase = balloc_base_unshift(a, shifted_base);
635 *porder = order;
636}
637
638/*
639 * Makes a buddy at the passed address. This will make all parent buddies
640 * necessary for this buddy to exist as well.
641 */
642static struct gk20a_buddy *__balloc_make_fixed_buddy(
643 struct gk20a_buddy_allocator *a, u64 base, u64 order)
644{
645 struct gk20a_buddy *bud = NULL;
646 struct list_head *order_list;
647 u64 cur_order = order, cur_base = base;
648
649 /*
650 * Algo:
651 * 1. Keep jumping up a buddy order until we find the real buddy that
652 * this buddy exists in.
653 * 2. Then work our way down through the buddy tree until we hit a dead
654 * end.
655 * 3. Start splitting buddies until we split to the one we need to
656 * make.
657 */
658 while (cur_order <= a->max_order) {
659 int found = 0;
660
661 order_list = balloc_get_order_list(a, cur_order);
662 list_for_each_entry(bud, order_list, buddy_entry) {
663 if (bud->start == cur_base) {
664 found = 1;
665 break;
666 }
667 }
668
669 if (found)
670 break;
671
672 __balloc_get_parent_range(a, cur_base, cur_order,
673 &cur_base, &cur_order);
674 }
675
676 if (cur_order > a->max_order) {
677 alloc_dbg(balloc_owner(a), "No buddy for range ???\n");
678 return NULL;
679 }
680
681 /* Split this buddy as necessary until we get the target buddy. */
682 while (bud->start != base || bud->order != order) {
683 if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) {
684 balloc_coalesce(a, bud);
685 return NULL;
686 }
687
688 if (base < bud->right->start)
689 bud = bud->left;
690 else
691 bud = bud->right;
692
693 }
694
695 return bud;
696}
697
698static u64 __balloc_do_alloc_fixed(struct gk20a_buddy_allocator *a,
699 struct gk20a_fixed_alloc *falloc,
700 u64 base, u64 len)
701{
702 u64 shifted_base, inc_base;
703 u64 align_order;
704
705 shifted_base = balloc_base_shift(a, base);
706 if (shifted_base == 0)
707 align_order = __fls(len >> a->blk_shift);
708 else
709 align_order = min_t(u64,
710 __ffs(shifted_base >> a->blk_shift),
711 __fls(len >> a->blk_shift));
712
713 if (align_order > a->max_order) {
714 alloc_dbg(balloc_owner(a),
715 "Align order too big: %llu > %llu\n",
716 align_order, a->max_order);
717 return 0;
718 }
719
720 /*
721 * Generate a list of buddies that satisfy this allocation.
722 */
723 inc_base = shifted_base;
724 while (inc_base < (shifted_base + len)) {
725 u64 order_len = balloc_order_to_len(a, align_order);
726 u64 remaining;
727 struct gk20a_buddy *bud;
728
729 bud = __balloc_make_fixed_buddy(a,
730 balloc_base_unshift(a, inc_base),
731 align_order);
732 if (!bud) {
733 alloc_dbg(balloc_owner(a),
734 "Fixed buddy failed: {0x%llx, %llu}!\n",
735 balloc_base_unshift(a, inc_base),
736 align_order);
737 goto err_and_cleanup;
738 }
739
740 balloc_blist_rem(a, bud);
741 balloc_alloc_buddy(a, bud);
742 __balloc_buddy_list_add(a, bud, &falloc->buddies);
743
744 /* Book keeping. */
745 inc_base += order_len;
746 remaining = (shifted_base + len) - inc_base;
747 align_order = __ffs(inc_base >> a->blk_shift);
748
749 /* If we don't have much left - trim down align_order. */
750 if (balloc_order_to_len(a, align_order) > remaining)
751 align_order = __balloc_max_order_in(a, inc_base,
752 inc_base + remaining);
753 }
754
755 return base;
756
757err_and_cleanup:
758 while (!list_empty(&falloc->buddies)) {
759 struct gk20a_buddy *bud = list_first_entry(&falloc->buddies,
760 struct gk20a_buddy,
761 buddy_entry);
762
763 __balloc_buddy_list_rem(a, bud);
764 balloc_free_buddy(a, bud->start);
765 kmem_cache_free(buddy_cache, bud);
766 }
767
768 return 0;
769}
770
771static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a,
772 struct gk20a_fixed_alloc *falloc)
773{
774 struct gk20a_buddy *bud;
775
776 while (!list_empty(&falloc->buddies)) {
777 bud = list_first_entry(&falloc->buddies,
778 struct gk20a_buddy,
779 buddy_entry);
780 __balloc_buddy_list_rem(a, bud);
781
782 balloc_free_buddy(a, bud->start);
783 balloc_blist_add(a, bud);
784 a->bytes_freed += balloc_order_to_len(a, bud->order);
785
786 /*
787 * Attemp to defrag the allocation.
788 */
789 balloc_coalesce(a, bud);
790 }
791
792 kfree(falloc);
793}
794
795/*
796 * Allocate memory from the passed allocator.
797 */
798static u64 gk20a_buddy_balloc(struct gk20a_allocator *__a, u64 len)
799{
800 u64 order, addr;
801 int pte_size;
802 struct gk20a_buddy_allocator *a = __a->priv;
803
804 gk20a_alloc_trace_func();
805
806 alloc_lock(__a);
807
808 order = balloc_get_order(a, len);
809
810 if (order > a->max_order) {
811 alloc_unlock(__a);
812 alloc_dbg(balloc_owner(a), "Alloc fail\n");
813 gk20a_alloc_trace_func_done();
814 return 0;
815 }
816
817 /*
818 * For now pass the base address of the allocator's region to
819 * __get_pte_size(). This ensures we get the right page size for
820 * the alloc but we don't have to know what the real address is
821 * going to be quite yet.
822 *
823 * TODO: once userspace supports a unified address space pass 0 for
824 * the base. This will make only 'len' affect the PTE size.
825 */
826 if (a->flags & GPU_BALLOC_GVA_SPACE)
827 pte_size = __get_pte_size(a->vm, a->base, len);
828 else
829 pte_size = BALLOC_PTE_SIZE_ANY;
830
831 addr = __balloc_do_alloc(a, order, pte_size);
832
833 if (addr) {
834 a->bytes_alloced += len;
835 a->bytes_alloced_real += balloc_order_to_len(a, order);
836 alloc_dbg(balloc_owner(a),
837 "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n",
838 addr, order, len,
839 pte_size == gmmu_page_size_big ? "big" :
840 pte_size == gmmu_page_size_small ? "small" :
841 "NA/any");
842 } else {
843 alloc_dbg(balloc_owner(a), "Alloc failed: no mem!\n");
844 }
845
846 alloc_unlock(__a);
847
848 gk20a_alloc_trace_func_done();
849 return addr;
850}
851
852/*
853 * Allocate a fixed address allocation. The address of the allocation is @base
854 * and the length is @len. This is not a typical buddy allocator operation and
855 * as such has a high posibility of failure if the address space is heavily in
856 * use.
857 *
858 * Please do not use this function unless _absolutely_ necessary.
859 */
860static u64 gk20a_balloc_fixed_buddy(struct gk20a_allocator *__a,
861 u64 base, u64 len)
862{
863 u64 ret, real_bytes = 0;
864 struct gk20a_buddy *bud;
865 struct gk20a_fixed_alloc *falloc = NULL;
866 struct gk20a_buddy_allocator *a = __a->priv;
867
868 gk20a_alloc_trace_func();
869
870 /* If base isn't aligned to an order 0 block, fail. */
871 if (base & (a->blk_size - 1))
872 goto fail;
873
874 if (len == 0)
875 goto fail;
876
877 falloc = kmalloc(sizeof(*falloc), GFP_KERNEL);
878 if (!falloc)
879 goto fail;
880
881 INIT_LIST_HEAD(&falloc->buddies);
882 falloc->start = base;
883 falloc->end = base + len;
884
885 alloc_lock(__a);
886 if (!balloc_is_range_free(a, base, base + len)) {
887 alloc_dbg(balloc_owner(a),
888 "Range not free: 0x%llx -> 0x%llx\n",
889 base, base + len);
890 goto fail_unlock;
891 }
892
893 ret = __balloc_do_alloc_fixed(a, falloc, base, len);
894 if (!ret) {
895 alloc_dbg(balloc_owner(a),
896 "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n",
897 base, base + len);
898 goto fail_unlock;
899 }
900
901 balloc_alloc_fixed(a, falloc);
902
903 list_for_each_entry(bud, &falloc->buddies, buddy_entry)
904 real_bytes += (bud->end - bud->start);
905
906 a->bytes_alloced += len;
907 a->bytes_alloced_real += real_bytes;
908
909 alloc_unlock(__a);
910 alloc_dbg(balloc_owner(a), "Alloc (fixed) 0x%llx\n", base);
911
912 gk20a_alloc_trace_func_done();
913 return base;
914
915fail_unlock:
916 alloc_unlock(__a);
917fail:
918 kfree(falloc);
919 gk20a_alloc_trace_func_done();
920 return 0;
921}
922
923/*
924 * Free the passed allocation.
925 */
926static void gk20a_buddy_bfree(struct gk20a_allocator *__a, u64 addr)
927{
928 struct gk20a_buddy *bud;
929 struct gk20a_fixed_alloc *falloc;
930 struct gk20a_buddy_allocator *a = __a->priv;
931
932 gk20a_alloc_trace_func();
933
934 if (!addr) {
935 gk20a_alloc_trace_func_done();
936 return;
937 }
938
939 alloc_lock(__a);
940
941 /*
942 * First see if this is a fixed alloc. If not fall back to a regular
943 * buddy.
944 */
945 falloc = balloc_free_fixed(a, addr);
946 if (falloc) {
947 __balloc_do_free_fixed(a, falloc);
948 goto done;
949 }
950
951 bud = balloc_free_buddy(a, addr);
952 if (!bud)
953 goto done;
954
955 balloc_blist_add(a, bud);
956 a->bytes_freed += balloc_order_to_len(a, bud->order);
957
958 /*
959 * Attemp to defrag the allocation.
960 */
961 balloc_coalesce(a, bud);
962
963done:
964 alloc_unlock(__a);
965 alloc_dbg(balloc_owner(a), "Free 0x%llx\n", addr);
966 gk20a_alloc_trace_func_done();
967 return;
968}
969
970static u64 gk20a_buddy_alloc_length(struct gk20a_allocator *a)
971{
972 struct gk20a_buddy_allocator *ba = a->priv;
973
974 return ba->length;
975}
976
977static u64 gk20a_buddy_alloc_base(struct gk20a_allocator *a)
978{
979 struct gk20a_buddy_allocator *ba = a->priv;
980
981 return ba->start;
982}
983
984static int gk20a_buddy_alloc_inited(struct gk20a_allocator *a)
985{
986 struct gk20a_buddy_allocator *ba = a->priv;
987
988 return ba->initialized;
989}
990
991static u64 gk20a_buddy_alloc_end(struct gk20a_allocator *a)
992{
993 struct gk20a_buddy_allocator *ba = a->priv;
994
995 return ba->end;
996}
997
998/*
999 * Print the buddy allocator top level stats. If you pass @s as NULL then the
1000 * stats are printed to the kernel log. This lets this code be used for
1001 * debugging purposes internal to the allocator.
1002 */
1003static void gk20a_buddy_print_stats(struct gk20a_allocator *__a,
1004 struct seq_file *s, int lock)
1005{
1006 int i;
1007 struct rb_node *node;
1008 struct gk20a_fixed_alloc *falloc;
1009 struct gk20a_buddy_allocator *a = __a->priv;
1010
1011 __alloc_pstat(s, __a, "base = %llu, limit = %llu, blk_size = %llu\n",
1012 a->base, a->length, a->blk_size);
1013 __alloc_pstat(s, __a, "Internal params:\n");
1014 __alloc_pstat(s, __a, " start = 0x%llx\n", a->start);
1015 __alloc_pstat(s, __a, " end = 0x%llx\n", a->end);
1016 __alloc_pstat(s, __a, " count = 0x%llx\n", a->count);
1017 __alloc_pstat(s, __a, " blks = 0x%llx\n", a->blks);
1018 __alloc_pstat(s, __a, " max_order = %llu\n", a->max_order);
1019
1020 __alloc_pstat(s, __a, "Buddy blocks:\n");
1021 __alloc_pstat(s, __a, " Order Free Alloced Split\n");
1022 __alloc_pstat(s, __a, " ----- ---- ------- -----\n");
1023
1024 if (lock)
1025 alloc_lock(__a);
1026 for (i = a->max_order; i >= 0; i--) {
1027 if (a->buddy_list_len[i] == 0 &&
1028 a->buddy_list_alloced[i] == 0 &&
1029 a->buddy_list_split[i] == 0)
1030 continue;
1031
1032 __alloc_pstat(s, __a, " %3d %-7llu %-9llu %llu\n", i,
1033 a->buddy_list_len[i],
1034 a->buddy_list_alloced[i],
1035 a->buddy_list_split[i]);
1036 }
1037
1038 __alloc_pstat(s, __a, "\n");
1039
1040 for (node = rb_first(&a->fixed_allocs), i = 1;
1041 node != NULL;
1042 node = rb_next(node)) {
1043 falloc = container_of(node,
1044 struct gk20a_fixed_alloc, alloced_entry);
1045
1046 __alloc_pstat(s, __a, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n",
1047 i, falloc->start, falloc->end);
1048 }
1049
1050 __alloc_pstat(s, __a, "\n");
1051 __alloc_pstat(s, __a, "Bytes allocated: %llu\n",
1052 a->bytes_alloced);
1053 __alloc_pstat(s, __a, "Bytes allocated (real): %llu\n",
1054 a->bytes_alloced_real);
1055 __alloc_pstat(s, __a, "Bytes freed: %llu\n",
1056 a->bytes_freed);
1057
1058 if (lock)
1059 alloc_unlock(__a);
1060}
1061
1062static const struct gk20a_allocator_ops buddy_ops = {
1063 .alloc = gk20a_buddy_balloc,
1064 .free = gk20a_buddy_bfree,
1065
1066 .alloc_fixed = gk20a_balloc_fixed_buddy,
1067 /* .free_fixed not needed. */
1068
1069 .base = gk20a_buddy_alloc_base,
1070 .length = gk20a_buddy_alloc_length,
1071 .end = gk20a_buddy_alloc_end,
1072 .inited = gk20a_buddy_alloc_inited,
1073
1074 .fini = gk20a_buddy_allocator_destroy,
1075
1076 .print_stats = gk20a_buddy_print_stats,
1077};
1078
1079/*
1080 * Initialize a buddy allocator. Returns 0 on success. This allocator does
1081 * not necessarily manage bytes. It manages distinct ranges of resources. This
1082 * allows the allocator to work for things like comp_tags, semaphores, etc.
1083 *
1084 * @allocator: Ptr to an allocator struct to init.
1085 * @vm: GPU VM to associate this allocator with. Can be NULL. Will be used to
1086 * get PTE size for GVA spaces.
1087 * @name: Name of the allocator. Doesn't have to be static storage.
1088 * @base: The base address of the resource pool being managed.
1089 * @size: Number of resources in the pool.
1090 * @blk_size: Minimum number of resources to allocate at once. For things like
1091 * semaphores this is 1. For GVA this might be as much as 64k. This
1092 * corresponds to order 0. Must be power of 2.
1093 * @max_order: Pick a maximum order. If you leave this as 0, the buddy allocator
1094 * will try and pick a reasonable max order.
1095 * @flags: Extra flags necessary. See GPU_BALLOC_*.
1096 */
1097int __gk20a_buddy_allocator_init(struct gk20a_allocator *__a,
1098 struct vm_gk20a *vm, const char *name,
1099 u64 base, u64 size, u64 blk_size,
1100 u64 max_order, u64 flags)
1101{
1102 int err;
1103 struct gk20a_buddy_allocator *a;
1104
1105 /* blk_size must be greater than 0 and a power of 2. */
1106 if (blk_size == 0)
1107 return -EINVAL;
1108 if (blk_size & (blk_size - 1))
1109 return -EINVAL;
1110
1111 if (max_order > GPU_BALLOC_MAX_ORDER)
1112 return -EINVAL;
1113
1114 /* If this is to manage a GVA space we need a VM. */
1115 if (flags & GPU_BALLOC_GVA_SPACE && !vm)
1116 return -EINVAL;
1117
1118 a = kzalloc(sizeof(struct gk20a_buddy_allocator), GFP_KERNEL);
1119 if (!a)
1120 return -ENOMEM;
1121
1122 err = __gk20a_alloc_common_init(__a, name, a, &buddy_ops);
1123 if (err)
1124 goto fail;
1125
1126 a->base = base;
1127 a->length = size;
1128 a->blk_size = blk_size;
1129 a->blk_shift = __ffs(blk_size);
1130 a->owner = __a;
1131
1132 /*
1133 * If base is 0 then modfy base to be the size of one block so that we
1134 * can return errors by returning addr == 0.
1135 */
1136 if (a->base == 0) {
1137 a->base = a->blk_size;
1138 a->length -= a->blk_size;
1139 }
1140
1141 a->vm = vm;
1142 if (flags & GPU_BALLOC_GVA_SPACE)
1143 a->pte_blk_order = balloc_get_order(a, vm->big_page_size << 10);
1144
1145 a->flags = flags;
1146 a->max_order = max_order;
1147
1148 balloc_allocator_align(a);
1149 balloc_compute_max_order(a);
1150
1151 /* Shared buddy kmem_cache for all allocators. */
1152 if (!buddy_cache)
1153 buddy_cache = KMEM_CACHE(gk20a_buddy, 0);
1154 if (!buddy_cache) {
1155 err = -ENOMEM;
1156 goto fail;
1157 }
1158
1159 a->alloced_buddies = RB_ROOT;
1160 a->fixed_allocs = RB_ROOT;
1161 err = balloc_init_lists(a);
1162 if (err)
1163 goto fail;
1164
1165 a->initialized = 1;
1166
1167 gk20a_init_alloc_debug(__a);
1168 alloc_dbg(__a, "New allocator: type buddy\n");
1169 alloc_dbg(__a, " base 0x%llx\n", a->base);
1170 alloc_dbg(__a, " size 0x%llx\n", a->length);
1171 alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size);
1172 alloc_dbg(__a, " max_order %llu\n", a->max_order);
1173 alloc_dbg(__a, " flags 0x%llx\n", a->flags);
1174
1175 return 0;
1176
1177fail:
1178 kfree(a);
1179 return err;
1180}
1181
1182int gk20a_buddy_allocator_init(struct gk20a_allocator *a, const char *name,
1183 u64 base, u64 size, u64 blk_size, u64 flags)
1184{
1185 return __gk20a_buddy_allocator_init(a, NULL, name,
1186 base, size, blk_size, 0, 0);
1187}