diff options
author | Alex Waterman <alexw@nvidia.com> | 2016-06-27 20:46:02 -0400 |
---|---|---|
committer | Alex Waterman <alexw@nvidia.com> | 2016-07-19 14:30:45 -0400 |
commit | 5672cbdf6d8e7b8b93a08cd388097e2d1f0a8843 (patch) | |
tree | c00d0cc5c7f46ffe39c14bfdb6585716cd071bde /drivers/gpu | |
parent | b6569319c772d84087a0a1a6d7146bdcae8e9aab (diff) |
gpu: nvgpu: Move buddy allocator to new file
Move the buddy allocator implementation to a new file to make the
code more organized. Also, as part of this, commonize some macros
and functions which will be used by future allocator implementations.
Bug 1781897
Change-Id: I1611534d5d872bf3b4677f7a1cc024a94b1c437e
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1172116
Reviewed-by: Yu-Huan Hsu <yhsu@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/Makefile | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/buddy_allocator_priv.h | 190 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a_allocator.c | 1216 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a_allocator.h | 200 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c | 1187 |
5 files changed, 1423 insertions, 1371 deletions
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 0fdd2e28..a9a0f92f 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile | |||
@@ -53,6 +53,7 @@ nvgpu-y := \ | |||
53 | gk20a/hal.o \ | 53 | gk20a/hal.o \ |
54 | gk20a/hal_gk20a.o \ | 54 | gk20a/hal_gk20a.o \ |
55 | gk20a/gk20a_allocator.o \ | 55 | gk20a/gk20a_allocator.o \ |
56 | gk20a/gk20a_allocator_buddy.o \ | ||
56 | gk20a/cde_gk20a.o \ | 57 | gk20a/cde_gk20a.o \ |
57 | gk20a/platform_gk20a_generic.o \ | 58 | gk20a/platform_gk20a_generic.o \ |
58 | gk20a/tsg_gk20a.o \ | 59 | gk20a/tsg_gk20a.o \ |
diff --git a/drivers/gpu/nvgpu/gk20a/buddy_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/buddy_allocator_priv.h new file mode 100644 index 00000000..bed0492c --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/buddy_allocator_priv.h | |||
@@ -0,0 +1,190 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef BUDDY_ALLOCATOR_PRIV_H | ||
18 | #define BUDDY_ALLOCATOR_PRIV_H | ||
19 | |||
20 | #include <linux/list.h> | ||
21 | #include <linux/rbtree.h> | ||
22 | |||
23 | struct gk20a_allocator; | ||
24 | struct vm_gk20a; | ||
25 | |||
26 | /* | ||
27 | * Each buddy is an element in a binary tree. | ||
28 | */ | ||
29 | struct gk20a_buddy { | ||
30 | struct gk20a_buddy *parent; /* Parent node. */ | ||
31 | struct gk20a_buddy *buddy; /* This node's buddy. */ | ||
32 | struct gk20a_buddy *left; /* Lower address sub-node. */ | ||
33 | struct gk20a_buddy *right; /* Higher address sub-node. */ | ||
34 | |||
35 | struct list_head buddy_entry; /* List entry for various lists. */ | ||
36 | struct rb_node alloced_entry; /* RB tree of allocations. */ | ||
37 | |||
38 | u64 start; /* Start address of this buddy. */ | ||
39 | u64 end; /* End address of this buddy. */ | ||
40 | u64 order; /* Buddy order. */ | ||
41 | |||
42 | #define BALLOC_BUDDY_ALLOCED 0x1 | ||
43 | #define BALLOC_BUDDY_SPLIT 0x2 | ||
44 | #define BALLOC_BUDDY_IN_LIST 0x4 | ||
45 | int flags; /* List of associated flags. */ | ||
46 | |||
47 | /* | ||
48 | * Size of the PDE this buddy is using. This allows for grouping like | ||
49 | * sized allocations into the same PDE. | ||
50 | */ | ||
51 | #define BALLOC_PTE_SIZE_ANY 0x0 | ||
52 | #define BALLOC_PTE_SIZE_SMALL 0x1 | ||
53 | #define BALLOC_PTE_SIZE_BIG 0x2 | ||
54 | int pte_size; | ||
55 | }; | ||
56 | |||
57 | #define __buddy_flag_ops(flag, flag_up) \ | ||
58 | static inline int buddy_is_ ## flag(struct gk20a_buddy *b) \ | ||
59 | { \ | ||
60 | return b->flags & BALLOC_BUDDY_ ## flag_up; \ | ||
61 | } \ | ||
62 | static inline void buddy_set_ ## flag(struct gk20a_buddy *b) \ | ||
63 | { \ | ||
64 | b->flags |= BALLOC_BUDDY_ ## flag_up; \ | ||
65 | } \ | ||
66 | static inline void buddy_clr_ ## flag(struct gk20a_buddy *b) \ | ||
67 | { \ | ||
68 | b->flags &= ~BALLOC_BUDDY_ ## flag_up; \ | ||
69 | } | ||
70 | |||
71 | /* | ||
72 | * int buddy_is_alloced(struct gk20a_buddy *b); | ||
73 | * void buddy_set_alloced(struct gk20a_buddy *b); | ||
74 | * void buddy_clr_alloced(struct gk20a_buddy *b); | ||
75 | * | ||
76 | * int buddy_is_split(struct gk20a_buddy *b); | ||
77 | * void buddy_set_split(struct gk20a_buddy *b); | ||
78 | * void buddy_clr_split(struct gk20a_buddy *b); | ||
79 | * | ||
80 | * int buddy_is_in_list(struct gk20a_buddy *b); | ||
81 | * void buddy_set_in_list(struct gk20a_buddy *b); | ||
82 | * void buddy_clr_in_list(struct gk20a_buddy *b); | ||
83 | */ | ||
84 | __buddy_flag_ops(alloced, ALLOCED); | ||
85 | __buddy_flag_ops(split, SPLIT); | ||
86 | __buddy_flag_ops(in_list, IN_LIST); | ||
87 | |||
88 | /* | ||
89 | * Keeps info for a fixed allocation. | ||
90 | */ | ||
91 | struct gk20a_fixed_alloc { | ||
92 | struct list_head buddies; /* List of buddies. */ | ||
93 | struct rb_node alloced_entry; /* RB tree of fixed allocations. */ | ||
94 | |||
95 | u64 start; /* Start of fixed block. */ | ||
96 | u64 end; /* End address. */ | ||
97 | }; | ||
98 | |||
99 | /* | ||
100 | * GPU buddy allocator for the various GPU address spaces. Each addressable unit | ||
101 | * doesn't have to correspond to a byte. In some cases each unit is a more | ||
102 | * complex object such as a comp_tag line or the like. | ||
103 | * | ||
104 | * The max order is computed based on the size of the minimum order and the size | ||
105 | * of the address space. | ||
106 | * | ||
107 | * order_size is the size of an order 0 buddy. | ||
108 | */ | ||
109 | struct gk20a_buddy_allocator { | ||
110 | struct gk20a_allocator *owner; /* Owner of this buddy allocator. */ | ||
111 | struct vm_gk20a *vm; /* Parent VM - can be NULL. */ | ||
112 | |||
113 | u64 base; /* Base address of the space. */ | ||
114 | u64 length; /* Length of the space. */ | ||
115 | u64 blk_size; /* Size of order 0 allocation. */ | ||
116 | u64 blk_shift; /* Shift to divide by blk_size. */ | ||
117 | |||
118 | /* Internal stuff. */ | ||
119 | u64 start; /* Real start (aligned to blk_size). */ | ||
120 | u64 end; /* Real end, trimmed if needed. */ | ||
121 | u64 count; /* Count of objects in space. */ | ||
122 | u64 blks; /* Count of blks in the space. */ | ||
123 | u64 max_order; /* Specific maximum order. */ | ||
124 | |||
125 | struct rb_root alloced_buddies; /* Outstanding allocations. */ | ||
126 | struct rb_root fixed_allocs; /* Outstanding fixed allocations. */ | ||
127 | |||
128 | /* | ||
129 | * Impose an upper bound on the maximum order. | ||
130 | */ | ||
131 | #define GPU_BALLOC_ORDER_LIST_LEN (GPU_BALLOC_MAX_ORDER + 1) | ||
132 | |||
133 | struct list_head buddy_list[GPU_BALLOC_ORDER_LIST_LEN]; | ||
134 | u64 buddy_list_len[GPU_BALLOC_ORDER_LIST_LEN]; | ||
135 | u64 buddy_list_split[GPU_BALLOC_ORDER_LIST_LEN]; | ||
136 | u64 buddy_list_alloced[GPU_BALLOC_ORDER_LIST_LEN]; | ||
137 | |||
138 | /* | ||
139 | * This is for when the allocator is managing a GVA space (the | ||
140 | * GPU_BALLOC_GVA_SPACE bit is set in @flags). This requires | ||
141 | * that we group like sized allocations into PDE blocks. | ||
142 | */ | ||
143 | u64 pte_blk_order; | ||
144 | |||
145 | int initialized; | ||
146 | |||
147 | u64 flags; | ||
148 | |||
149 | u64 bytes_alloced; | ||
150 | u64 bytes_alloced_real; | ||
151 | u64 bytes_freed; | ||
152 | }; | ||
153 | |||
154 | static inline struct gk20a_buddy_allocator *buddy_allocator( | ||
155 | struct gk20a_allocator *a) | ||
156 | { | ||
157 | return (struct gk20a_buddy_allocator *)(a)->priv; | ||
158 | } | ||
159 | |||
160 | static inline struct list_head *balloc_get_order_list( | ||
161 | struct gk20a_buddy_allocator *a, int order) | ||
162 | { | ||
163 | return &a->buddy_list[order]; | ||
164 | } | ||
165 | |||
166 | static inline u64 balloc_order_to_len(struct gk20a_buddy_allocator *a, | ||
167 | int order) | ||
168 | { | ||
169 | return (1 << order) * a->blk_size; | ||
170 | } | ||
171 | |||
172 | static inline u64 balloc_base_shift(struct gk20a_buddy_allocator *a, | ||
173 | u64 base) | ||
174 | { | ||
175 | return base - a->start; | ||
176 | } | ||
177 | |||
178 | static inline u64 balloc_base_unshift(struct gk20a_buddy_allocator *a, | ||
179 | u64 base) | ||
180 | { | ||
181 | return base + a->start; | ||
182 | } | ||
183 | |||
184 | static inline struct gk20a_allocator *balloc_owner( | ||
185 | struct gk20a_buddy_allocator *a) | ||
186 | { | ||
187 | return a->owner; | ||
188 | } | ||
189 | |||
190 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c index f2164768..b7e9a5e4 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c | |||
@@ -19,125 +19,13 @@ | |||
19 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
21 | 21 | ||
22 | #include "mm_gk20a.h" | ||
22 | #include "platform_gk20a.h" | 23 | #include "platform_gk20a.h" |
23 | #include "gk20a_allocator.h" | 24 | #include "gk20a_allocator.h" |
24 | 25 | ||
25 | #include "mm_gk20a.h" | ||
26 | |||
27 | static struct dentry *gk20a_alloc_debugfs_root; | ||
28 | |||
29 | static struct kmem_cache *buddy_cache; /* slab cache for meta data. */ | ||
30 | |||
31 | u32 gk20a_alloc_tracing_on; | 26 | u32 gk20a_alloc_tracing_on; |
32 | 27 | ||
33 | #define gk20a_alloc_trace_func() \ | 28 | static struct dentry *gk20a_alloc_debugfs_root; |
34 | do { \ | ||
35 | if (gk20a_alloc_tracing_on) \ | ||
36 | trace_printk("%s\n", __func__); \ | ||
37 | } while (0) | ||
38 | |||
39 | #define gk20a_alloc_trace_func_done() \ | ||
40 | do { \ | ||
41 | if (gk20a_alloc_tracing_on) \ | ||
42 | trace_printk("%s_done\n", __func__); \ | ||
43 | } while (0) | ||
44 | |||
45 | /* | ||
46 | * Buddy allocator implementation. | ||
47 | */ | ||
48 | static u64 gk20a_buddy_alloc(struct gk20a_allocator *__a, u64 len); | ||
49 | static void gk20a_buddy_free(struct gk20a_allocator *__a, u64 addr); | ||
50 | static u64 gk20a_buddy_alloc_fixed(struct gk20a_allocator *__a, | ||
51 | u64 base, u64 len); | ||
52 | static u64 gk20a_buddy_alloc_base(struct gk20a_allocator *a); | ||
53 | static u64 gk20a_buddy_alloc_length(struct gk20a_allocator *a); | ||
54 | static u64 gk20a_buddy_alloc_end(struct gk20a_allocator *a); | ||
55 | static int gk20a_buddy_alloc_inited(struct gk20a_allocator *a); | ||
56 | |||
57 | static void gk20a_buddy_allocator_destroy(struct gk20a_allocator *__a); | ||
58 | static void gk20a_buddy_print_stats(struct gk20a_allocator *__a, | ||
59 | struct seq_file *s, int lock); | ||
60 | |||
61 | /* Some other buddy allocator functions. */ | ||
62 | static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a, | ||
63 | u64 addr); | ||
64 | static void balloc_coalesce(struct gk20a_buddy_allocator *a, | ||
65 | struct gk20a_buddy *b); | ||
66 | static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a, | ||
67 | struct gk20a_fixed_alloc *falloc); | ||
68 | |||
69 | /* Debugging. */ | ||
70 | static void gk20a_init_alloc_debug(struct gk20a_allocator *a); | ||
71 | |||
72 | /* | ||
73 | * This function is not present in older kernel's list.h code. | ||
74 | */ | ||
75 | #ifndef list_last_entry | ||
76 | #define list_last_entry(ptr, type, member) \ | ||
77 | list_entry((ptr)->prev, type, member) | ||
78 | #endif | ||
79 | |||
80 | static const struct gk20a_allocator_ops buddy_ops = { | ||
81 | .alloc = gk20a_buddy_alloc, | ||
82 | .free = gk20a_buddy_free, | ||
83 | |||
84 | .alloc_fixed = gk20a_buddy_alloc_fixed, | ||
85 | /* .free_fixed not needed. */ | ||
86 | |||
87 | .base = gk20a_buddy_alloc_base, | ||
88 | .length = gk20a_buddy_alloc_length, | ||
89 | .end = gk20a_buddy_alloc_end, | ||
90 | .inited = gk20a_buddy_alloc_inited, | ||
91 | |||
92 | .fini = gk20a_buddy_allocator_destroy, | ||
93 | |||
94 | .print_stats = gk20a_buddy_print_stats, | ||
95 | }; | ||
96 | |||
97 | /* | ||
98 | * GPU buddy allocator for various address spaces. | ||
99 | * | ||
100 | * Current limitations: | ||
101 | * o A fixed allocation could potentially be made that borders PDEs with | ||
102 | * different PTE sizes. This would require that fixed buffer to have | ||
103 | * different sized PTEs for different parts of the allocation. Probably | ||
104 | * best to just require PDE alignment for fixed address allocs. | ||
105 | * | ||
106 | * o It is currently possible to make an allocator that has a buddy alignment | ||
107 | * out of sync with the PDE block size alignment. A simple example is a | ||
108 | * 32GB address space starting at byte 1. Every buddy is shifted off by 1 | ||
109 | * which means each buddy corresponf to more than one actual GPU page. The | ||
110 | * best way to fix this is probably just require PDE blocksize alignment | ||
111 | * for the start of the address space. At the moment all allocators are | ||
112 | * easily PDE aligned so this hasn't been a problem. | ||
113 | */ | ||
114 | |||
115 | static u64 gk20a_buddy_alloc_length(struct gk20a_allocator *a) | ||
116 | { | ||
117 | struct gk20a_buddy_allocator *ba = a->priv; | ||
118 | |||
119 | return ba->length; | ||
120 | } | ||
121 | |||
122 | static u64 gk20a_buddy_alloc_base(struct gk20a_allocator *a) | ||
123 | { | ||
124 | struct gk20a_buddy_allocator *ba = a->priv; | ||
125 | |||
126 | return ba->start; | ||
127 | } | ||
128 | |||
129 | static int gk20a_buddy_alloc_inited(struct gk20a_allocator *a) | ||
130 | { | ||
131 | struct gk20a_buddy_allocator *ba = a->priv; | ||
132 | |||
133 | return ba->inited; | ||
134 | } | ||
135 | static u64 gk20a_buddy_alloc_end(struct gk20a_allocator *a) | ||
136 | { | ||
137 | struct gk20a_buddy_allocator *ba = a->priv; | ||
138 | |||
139 | return ba->end; | ||
140 | } | ||
141 | 29 | ||
142 | u64 gk20a_alloc_length(struct gk20a_allocator *a) | 30 | u64 gk20a_alloc_length(struct gk20a_allocator *a) |
143 | { | 31 | { |
@@ -195,189 +83,11 @@ void gk20a_alloc_destroy(struct gk20a_allocator *a) | |||
195 | } | 83 | } |
196 | 84 | ||
197 | /* | 85 | /* |
198 | * Pick a suitable maximum order for this allocator. | ||
199 | * | ||
200 | * Hueristic: Just guessing that the best max order is the largest single | ||
201 | * block that will fit in the address space. | ||
202 | */ | ||
203 | static void balloc_compute_max_order(struct gk20a_buddy_allocator *a) | ||
204 | { | ||
205 | u64 true_max_order = ilog2(a->blks); | ||
206 | |||
207 | if (a->max_order == 0) { | ||
208 | a->max_order = true_max_order; | ||
209 | return; | ||
210 | } | ||
211 | |||
212 | if (a->max_order > true_max_order) | ||
213 | a->max_order = true_max_order; | ||
214 | if (a->max_order > GPU_BALLOC_MAX_ORDER) | ||
215 | a->max_order = GPU_BALLOC_MAX_ORDER; | ||
216 | } | ||
217 | |||
218 | /* | ||
219 | * Since we can only allocate in chucks of a->blk_size we need to trim off | ||
220 | * any excess data that is not aligned to a->blk_size. | ||
221 | */ | ||
222 | static void balloc_allocator_align(struct gk20a_buddy_allocator *a) | ||
223 | { | ||
224 | a->start = ALIGN(a->base, a->blk_size); | ||
225 | WARN_ON(a->start != a->base); | ||
226 | a->end = (a->base + a->length) & ~(a->blk_size - 1); | ||
227 | a->count = a->end - a->start; | ||
228 | a->blks = a->count >> a->blk_shift; | ||
229 | } | ||
230 | |||
231 | /* | ||
232 | * Pass NULL for parent if you want a top level buddy. | ||
233 | */ | ||
234 | static struct gk20a_buddy *balloc_new_buddy(struct gk20a_buddy_allocator *a, | ||
235 | struct gk20a_buddy *parent, | ||
236 | u64 start, u64 order) | ||
237 | { | ||
238 | struct gk20a_buddy *new_buddy; | ||
239 | |||
240 | new_buddy = kmem_cache_alloc(buddy_cache, GFP_KERNEL); | ||
241 | if (!new_buddy) | ||
242 | return NULL; | ||
243 | |||
244 | memset(new_buddy, 0, sizeof(struct gk20a_buddy)); | ||
245 | |||
246 | new_buddy->parent = parent; | ||
247 | new_buddy->start = start; | ||
248 | new_buddy->order = order; | ||
249 | new_buddy->end = start + (1 << order) * a->blk_size; | ||
250 | |||
251 | return new_buddy; | ||
252 | } | ||
253 | |||
254 | static void __balloc_buddy_list_add(struct gk20a_buddy_allocator *a, | ||
255 | struct gk20a_buddy *b, | ||
256 | struct list_head *list) | ||
257 | { | ||
258 | if (buddy_is_in_list(b)) { | ||
259 | alloc_dbg(balloc_owner(a), | ||
260 | "Oops: adding added buddy (%llu:0x%llx)\n", | ||
261 | b->order, b->start); | ||
262 | BUG(); | ||
263 | } | ||
264 | |||
265 | /* | ||
266 | * Add big PTE blocks to the tail, small to the head for GVA spaces. | ||
267 | * This lets the code that checks if there are available blocks check | ||
268 | * without cycling through the entire list. | ||
269 | */ | ||
270 | if (a->flags & GPU_BALLOC_GVA_SPACE && | ||
271 | b->pte_size == BALLOC_PTE_SIZE_BIG) | ||
272 | list_add_tail(&b->buddy_entry, list); | ||
273 | else | ||
274 | list_add(&b->buddy_entry, list); | ||
275 | |||
276 | buddy_set_in_list(b); | ||
277 | } | ||
278 | |||
279 | static void __balloc_buddy_list_rem(struct gk20a_buddy_allocator *a, | ||
280 | struct gk20a_buddy *b) | ||
281 | { | ||
282 | if (!buddy_is_in_list(b)) { | ||
283 | alloc_dbg(balloc_owner(a), | ||
284 | "Oops: removing removed buddy (%llu:0x%llx)\n", | ||
285 | b->order, b->start); | ||
286 | BUG(); | ||
287 | } | ||
288 | |||
289 | list_del_init(&b->buddy_entry); | ||
290 | buddy_clr_in_list(b); | ||
291 | } | ||
292 | |||
293 | /* | ||
294 | * Add a buddy to one of the buddy lists and deal with the necessary | ||
295 | * book keeping. Adds the buddy to the list specified by the buddy's order. | ||
296 | */ | ||
297 | static void balloc_blist_add(struct gk20a_buddy_allocator *a, | ||
298 | struct gk20a_buddy *b) | ||
299 | { | ||
300 | __balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order)); | ||
301 | a->buddy_list_len[b->order]++; | ||
302 | } | ||
303 | |||
304 | static void balloc_blist_rem(struct gk20a_buddy_allocator *a, | ||
305 | struct gk20a_buddy *b) | ||
306 | { | ||
307 | __balloc_buddy_list_rem(a, b); | ||
308 | a->buddy_list_len[b->order]--; | ||
309 | } | ||
310 | |||
311 | static u64 balloc_get_order(struct gk20a_buddy_allocator *a, u64 len) | ||
312 | { | ||
313 | if (len == 0) | ||
314 | return 0; | ||
315 | |||
316 | len--; | ||
317 | len >>= a->blk_shift; | ||
318 | |||
319 | return fls(len); | ||
320 | } | ||
321 | |||
322 | static u64 __balloc_max_order_in(struct gk20a_buddy_allocator *a, | ||
323 | u64 start, u64 end) | ||
324 | { | ||
325 | u64 size = (end - start) >> a->blk_shift; | ||
326 | |||
327 | if (size > 0) | ||
328 | return min_t(u64, ilog2(size), a->max_order); | ||
329 | else | ||
330 | return GPU_BALLOC_MAX_ORDER; | ||
331 | } | ||
332 | |||
333 | /* | ||
334 | * Initialize the buddy lists. | ||
335 | */ | ||
336 | static int balloc_init_lists(struct gk20a_buddy_allocator *a) | ||
337 | { | ||
338 | int i; | ||
339 | u64 bstart, bend, order; | ||
340 | struct gk20a_buddy *buddy; | ||
341 | |||
342 | bstart = a->start; | ||
343 | bend = a->end; | ||
344 | |||
345 | /* First make sure the LLs are valid. */ | ||
346 | for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) | ||
347 | INIT_LIST_HEAD(balloc_get_order_list(a, i)); | ||
348 | |||
349 | while (bstart < bend) { | ||
350 | order = __balloc_max_order_in(a, bstart, bend); | ||
351 | |||
352 | buddy = balloc_new_buddy(a, NULL, bstart, order); | ||
353 | if (!buddy) | ||
354 | goto cleanup; | ||
355 | |||
356 | balloc_blist_add(a, buddy); | ||
357 | bstart += balloc_order_to_len(a, order); | ||
358 | } | ||
359 | |||
360 | return 0; | ||
361 | |||
362 | cleanup: | ||
363 | for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) { | ||
364 | if (!list_empty(balloc_get_order_list(a, i))) { | ||
365 | buddy = list_first_entry(balloc_get_order_list(a, i), | ||
366 | struct gk20a_buddy, buddy_entry); | ||
367 | balloc_blist_rem(a, buddy); | ||
368 | kmem_cache_free(buddy_cache, buddy); | ||
369 | } | ||
370 | } | ||
371 | |||
372 | return -ENOMEM; | ||
373 | } | ||
374 | |||
375 | /* | ||
376 | * Handle the common init stuff for a gk20a_allocator. | 86 | * Handle the common init stuff for a gk20a_allocator. |
377 | */ | 87 | */ |
378 | static int __gk20a_alloc_common_init(struct gk20a_allocator *a, | 88 | int __gk20a_alloc_common_init(struct gk20a_allocator *a, |
379 | const char *name, void *priv, | 89 | const char *name, void *priv, |
380 | const struct gk20a_allocator_ops *ops) | 90 | const struct gk20a_allocator_ops *ops) |
381 | { | 91 | { |
382 | if (!ops) | 92 | if (!ops) |
383 | return -EINVAL; | 93 | return -EINVAL; |
@@ -392,909 +102,6 @@ static int __gk20a_alloc_common_init(struct gk20a_allocator *a, | |||
392 | return 0; | 102 | return 0; |
393 | } | 103 | } |
394 | 104 | ||
395 | /* | ||
396 | * Initialize a buddy allocator. Returns 0 on success. This allocator does | ||
397 | * not necessarily manage bytes. It manages distinct ranges of resources. This | ||
398 | * allows the allocator to work for things like comp_tags, semaphores, etc. | ||
399 | * | ||
400 | * @allocator: Ptr to an allocator struct to init. | ||
401 | * @vm: GPU VM to associate this allocator with. Can be NULL. Will be used to | ||
402 | * get PTE size for GVA spaces. | ||
403 | * @name: Name of the allocator. Doesn't have to be static storage. | ||
404 | * @base: The base address of the resource pool being managed. | ||
405 | * @size: Number of resources in the pool. | ||
406 | * @blk_size: Minimum number of resources to allocate at once. For things like | ||
407 | * semaphores this is 1. For GVA this might be as much as 64k. This | ||
408 | * corresponds to order 0. Must be power of 2. | ||
409 | * @max_order: Pick a maximum order. If you leave this as 0, the buddy allocator | ||
410 | * will try and pick a reasonable max order. | ||
411 | * @flags: Extra flags necessary. See GPU_BALLOC_*. | ||
412 | */ | ||
413 | int __gk20a_buddy_allocator_init(struct gk20a_allocator *__a, | ||
414 | struct vm_gk20a *vm, const char *name, | ||
415 | u64 base, u64 size, u64 blk_size, | ||
416 | u64 max_order, u64 flags) | ||
417 | { | ||
418 | int err; | ||
419 | struct gk20a_buddy_allocator *a; | ||
420 | |||
421 | /* blk_size must be greater than 0 and a power of 2. */ | ||
422 | if (blk_size == 0) | ||
423 | return -EINVAL; | ||
424 | if (blk_size & (blk_size - 1)) | ||
425 | return -EINVAL; | ||
426 | |||
427 | if (max_order > GPU_BALLOC_MAX_ORDER) | ||
428 | return -EINVAL; | ||
429 | |||
430 | /* If this is to manage a GVA space we need a VM. */ | ||
431 | if (flags & GPU_BALLOC_GVA_SPACE && !vm) | ||
432 | return -EINVAL; | ||
433 | |||
434 | a = kzalloc(sizeof(struct gk20a_buddy_allocator), GFP_KERNEL); | ||
435 | if (!a) | ||
436 | return -ENOMEM; | ||
437 | |||
438 | err = __gk20a_alloc_common_init(__a, name, a, &buddy_ops); | ||
439 | if (err) | ||
440 | goto fail; | ||
441 | |||
442 | a->base = base; | ||
443 | a->length = size; | ||
444 | a->blk_size = blk_size; | ||
445 | a->blk_shift = __ffs(blk_size); | ||
446 | a->owner = __a; | ||
447 | |||
448 | /* | ||
449 | * If base is 0 then modfy base to be the size of one block so that we | ||
450 | * can return errors by returning addr == 0. | ||
451 | */ | ||
452 | if (a->base == 0) { | ||
453 | a->base = a->blk_size; | ||
454 | a->length -= a->blk_size; | ||
455 | } | ||
456 | |||
457 | a->vm = vm; | ||
458 | if (flags & GPU_BALLOC_GVA_SPACE) | ||
459 | a->pte_blk_order = balloc_get_order(a, vm->big_page_size << 10); | ||
460 | |||
461 | a->flags = flags; | ||
462 | a->max_order = max_order; | ||
463 | |||
464 | balloc_allocator_align(a); | ||
465 | balloc_compute_max_order(a); | ||
466 | |||
467 | /* Shared buddy kmem_cache for all allocators. */ | ||
468 | if (!buddy_cache) | ||
469 | buddy_cache = KMEM_CACHE(gk20a_buddy, 0); | ||
470 | if (!buddy_cache) { | ||
471 | err = -ENOMEM; | ||
472 | goto fail; | ||
473 | } | ||
474 | |||
475 | a->alloced_buddies = RB_ROOT; | ||
476 | a->fixed_allocs = RB_ROOT; | ||
477 | err = balloc_init_lists(a); | ||
478 | if (err) | ||
479 | goto fail; | ||
480 | |||
481 | a->inited = 1; | ||
482 | |||
483 | gk20a_init_alloc_debug(__a); | ||
484 | alloc_dbg(__a, "New allocator: base 0x%llx\n", a->base); | ||
485 | alloc_dbg(__a, " size 0x%llx\n", a->length); | ||
486 | alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); | ||
487 | alloc_dbg(__a, " max_order %llu\n", a->max_order); | ||
488 | alloc_dbg(__a, " flags 0x%llx\n", a->flags); | ||
489 | |||
490 | return 0; | ||
491 | |||
492 | fail: | ||
493 | kfree(a); | ||
494 | return err; | ||
495 | } | ||
496 | |||
497 | int gk20a_buddy_allocator_init(struct gk20a_allocator *a, const char *name, | ||
498 | u64 base, u64 size, u64 blk_size, u64 flags) | ||
499 | { | ||
500 | return __gk20a_buddy_allocator_init(a, NULL, name, | ||
501 | base, size, blk_size, 0, 0); | ||
502 | } | ||
503 | |||
504 | /* | ||
505 | * Clean up and destroy the passed allocator. | ||
506 | */ | ||
507 | static void gk20a_buddy_allocator_destroy(struct gk20a_allocator *__a) | ||
508 | { | ||
509 | int i; | ||
510 | struct rb_node *node; | ||
511 | struct gk20a_buddy *bud; | ||
512 | struct gk20a_fixed_alloc *falloc; | ||
513 | struct gk20a_buddy_allocator *a = __a->priv; | ||
514 | |||
515 | alloc_lock(__a); | ||
516 | |||
517 | if (!IS_ERR_OR_NULL(__a->debugfs_entry)) | ||
518 | debugfs_remove(__a->debugfs_entry); | ||
519 | |||
520 | /* | ||
521 | * Free the fixed allocs first. | ||
522 | */ | ||
523 | while ((node = rb_first(&a->fixed_allocs)) != NULL) { | ||
524 | falloc = container_of(node, | ||
525 | struct gk20a_fixed_alloc, alloced_entry); | ||
526 | |||
527 | rb_erase(node, &a->fixed_allocs); | ||
528 | __balloc_do_free_fixed(a, falloc); | ||
529 | } | ||
530 | |||
531 | /* | ||
532 | * And now free all outstanding allocations. | ||
533 | */ | ||
534 | while ((node = rb_first(&a->alloced_buddies)) != NULL) { | ||
535 | bud = container_of(node, struct gk20a_buddy, alloced_entry); | ||
536 | balloc_free_buddy(a, bud->start); | ||
537 | balloc_blist_add(a, bud); | ||
538 | balloc_coalesce(a, bud); | ||
539 | } | ||
540 | |||
541 | /* | ||
542 | * Now clean up the unallocated buddies. | ||
543 | */ | ||
544 | for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) { | ||
545 | BUG_ON(a->buddy_list_alloced[i] != 0); | ||
546 | |||
547 | while (!list_empty(balloc_get_order_list(a, i))) { | ||
548 | bud = list_first_entry(balloc_get_order_list(a, i), | ||
549 | struct gk20a_buddy, buddy_entry); | ||
550 | balloc_blist_rem(a, bud); | ||
551 | kmem_cache_free(buddy_cache, bud); | ||
552 | } | ||
553 | |||
554 | if (a->buddy_list_len[i] != 0) { | ||
555 | pr_info("Excess buddies!!! (%d: %llu)\n", | ||
556 | i, a->buddy_list_len[i]); | ||
557 | BUG(); | ||
558 | } | ||
559 | if (a->buddy_list_split[i] != 0) { | ||
560 | pr_info("Excess split nodes!!! (%d: %llu)\n", | ||
561 | i, a->buddy_list_split[i]); | ||
562 | BUG(); | ||
563 | } | ||
564 | if (a->buddy_list_alloced[i] != 0) { | ||
565 | pr_info("Excess alloced nodes!!! (%d: %llu)\n", | ||
566 | i, a->buddy_list_alloced[i]); | ||
567 | BUG(); | ||
568 | } | ||
569 | } | ||
570 | |||
571 | kfree(a); | ||
572 | |||
573 | alloc_unlock(__a); | ||
574 | } | ||
575 | |||
576 | /* | ||
577 | * Combine the passed buddy if possible. The pointer in @b may not be valid | ||
578 | * after this as the buddy may be freed. | ||
579 | * | ||
580 | * @a must be locked. | ||
581 | */ | ||
582 | static void balloc_coalesce(struct gk20a_buddy_allocator *a, | ||
583 | struct gk20a_buddy *b) | ||
584 | { | ||
585 | struct gk20a_buddy *parent; | ||
586 | |||
587 | if (buddy_is_alloced(b) || buddy_is_split(b)) | ||
588 | return; | ||
589 | |||
590 | /* | ||
591 | * If both our buddy and I are both not allocated and not split then | ||
592 | * we can coalesce ourselves. | ||
593 | */ | ||
594 | if (!b->buddy) | ||
595 | return; | ||
596 | if (buddy_is_alloced(b->buddy) || buddy_is_split(b->buddy)) | ||
597 | return; | ||
598 | |||
599 | parent = b->parent; | ||
600 | |||
601 | balloc_blist_rem(a, b); | ||
602 | balloc_blist_rem(a, b->buddy); | ||
603 | |||
604 | buddy_clr_split(parent); | ||
605 | a->buddy_list_split[parent->order]--; | ||
606 | balloc_blist_add(a, parent); | ||
607 | |||
608 | /* | ||
609 | * Recursively coalesce as far as we can go. | ||
610 | */ | ||
611 | balloc_coalesce(a, parent); | ||
612 | |||
613 | /* Clean up the remains. */ | ||
614 | kmem_cache_free(buddy_cache, b->buddy); | ||
615 | kmem_cache_free(buddy_cache, b); | ||
616 | } | ||
617 | |||
618 | /* | ||
619 | * Split a buddy into two new buddies who are 1/2 the size of the parent buddy. | ||
620 | * | ||
621 | * @a must be locked. | ||
622 | */ | ||
623 | static int balloc_split_buddy(struct gk20a_buddy_allocator *a, | ||
624 | struct gk20a_buddy *b, int pte_size) | ||
625 | { | ||
626 | struct gk20a_buddy *left, *right; | ||
627 | u64 half; | ||
628 | |||
629 | left = balloc_new_buddy(a, b, b->start, b->order - 1); | ||
630 | if (!left) | ||
631 | return -ENOMEM; | ||
632 | |||
633 | half = (b->end - b->start) / 2; | ||
634 | |||
635 | right = balloc_new_buddy(a, b, b->start + half, b->order - 1); | ||
636 | if (!right) { | ||
637 | kmem_cache_free(buddy_cache, left); | ||
638 | return -ENOMEM; | ||
639 | } | ||
640 | |||
641 | buddy_set_split(b); | ||
642 | a->buddy_list_split[b->order]++; | ||
643 | |||
644 | b->left = left; | ||
645 | b->right = right; | ||
646 | left->buddy = right; | ||
647 | right->buddy = left; | ||
648 | left->parent = b; | ||
649 | right->parent = b; | ||
650 | |||
651 | /* PTE considerations. */ | ||
652 | if (a->flags & GPU_BALLOC_GVA_SPACE && | ||
653 | left->order <= a->pte_blk_order) { | ||
654 | left->pte_size = pte_size; | ||
655 | right->pte_size = pte_size; | ||
656 | } | ||
657 | |||
658 | balloc_blist_rem(a, b); | ||
659 | balloc_blist_add(a, left); | ||
660 | balloc_blist_add(a, right); | ||
661 | |||
662 | return 0; | ||
663 | } | ||
664 | |||
665 | /* | ||
666 | * Place the passed buddy into the RB tree for allocated buddies. Never fails | ||
667 | * unless the passed entry is a duplicate which is a bug. | ||
668 | * | ||
669 | * @a must be locked. | ||
670 | */ | ||
671 | static void balloc_alloc_buddy(struct gk20a_buddy_allocator *a, | ||
672 | struct gk20a_buddy *b) | ||
673 | { | ||
674 | struct rb_node **new = &(a->alloced_buddies.rb_node); | ||
675 | struct rb_node *parent = NULL; | ||
676 | |||
677 | while (*new) { | ||
678 | struct gk20a_buddy *bud = container_of(*new, struct gk20a_buddy, | ||
679 | alloced_entry); | ||
680 | |||
681 | parent = *new; | ||
682 | if (b->start < bud->start) | ||
683 | new = &((*new)->rb_left); | ||
684 | else if (b->start > bud->start) | ||
685 | new = &((*new)->rb_right); | ||
686 | else | ||
687 | BUG_ON("Duplicate entries in allocated list!\n"); | ||
688 | } | ||
689 | |||
690 | rb_link_node(&b->alloced_entry, parent, new); | ||
691 | rb_insert_color(&b->alloced_entry, &a->alloced_buddies); | ||
692 | |||
693 | buddy_set_alloced(b); | ||
694 | a->buddy_list_alloced[b->order]++; | ||
695 | } | ||
696 | |||
697 | /* | ||
698 | * Remove the passed buddy from the allocated buddy RB tree. Returns the | ||
699 | * deallocated buddy for further processing. | ||
700 | * | ||
701 | * @a must be locked. | ||
702 | */ | ||
703 | static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a, | ||
704 | u64 addr) | ||
705 | { | ||
706 | struct rb_node *node = a->alloced_buddies.rb_node; | ||
707 | struct gk20a_buddy *bud; | ||
708 | |||
709 | while (node) { | ||
710 | bud = container_of(node, struct gk20a_buddy, alloced_entry); | ||
711 | |||
712 | if (addr < bud->start) | ||
713 | node = node->rb_left; | ||
714 | else if (addr > bud->start) | ||
715 | node = node->rb_right; | ||
716 | else | ||
717 | break; | ||
718 | } | ||
719 | |||
720 | if (!node) | ||
721 | return NULL; | ||
722 | |||
723 | rb_erase(node, &a->alloced_buddies); | ||
724 | buddy_clr_alloced(bud); | ||
725 | a->buddy_list_alloced[bud->order]--; | ||
726 | |||
727 | return bud; | ||
728 | } | ||
729 | |||
730 | /* | ||
731 | * Find a suitable buddy for the given order and PTE type (big or little). | ||
732 | */ | ||
733 | static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_buddy_allocator *a, | ||
734 | u64 order, int pte_size) | ||
735 | { | ||
736 | struct gk20a_buddy *bud; | ||
737 | |||
738 | if (order > a->max_order || | ||
739 | list_empty(balloc_get_order_list(a, order))) | ||
740 | return NULL; | ||
741 | |||
742 | if (a->flags & GPU_BALLOC_GVA_SPACE && | ||
743 | pte_size == BALLOC_PTE_SIZE_BIG) | ||
744 | bud = list_last_entry(balloc_get_order_list(a, order), | ||
745 | struct gk20a_buddy, buddy_entry); | ||
746 | else | ||
747 | bud = list_first_entry(balloc_get_order_list(a, order), | ||
748 | struct gk20a_buddy, buddy_entry); | ||
749 | |||
750 | if (bud->pte_size != BALLOC_PTE_SIZE_ANY && | ||
751 | bud->pte_size != pte_size) | ||
752 | return NULL; | ||
753 | |||
754 | return bud; | ||
755 | } | ||
756 | |||
757 | /* | ||
758 | * Allocate a suitably sized buddy. If no suitable buddy exists split higher | ||
759 | * order buddies until we have a suitable buddy to allocate. | ||
760 | * | ||
761 | * For PDE grouping add an extra check to see if a buddy is suitable: that the | ||
762 | * buddy exists in a PDE who's PTE size is reasonable | ||
763 | * | ||
764 | * @a must be locked. | ||
765 | */ | ||
766 | static u64 __balloc_do_alloc(struct gk20a_buddy_allocator *a, | ||
767 | u64 order, int pte_size) | ||
768 | { | ||
769 | u64 split_order; | ||
770 | struct gk20a_buddy *bud = NULL; | ||
771 | |||
772 | split_order = order; | ||
773 | while (split_order <= a->max_order && | ||
774 | !(bud = __balloc_find_buddy(a, split_order, pte_size))) | ||
775 | split_order++; | ||
776 | |||
777 | /* Out of memory! */ | ||
778 | if (!bud) | ||
779 | return 0; | ||
780 | |||
781 | while (bud->order != order) { | ||
782 | if (balloc_split_buddy(a, bud, pte_size)) | ||
783 | return 0; /* No mem... */ | ||
784 | bud = bud->left; | ||
785 | } | ||
786 | |||
787 | balloc_blist_rem(a, bud); | ||
788 | balloc_alloc_buddy(a, bud); | ||
789 | |||
790 | return bud->start; | ||
791 | } | ||
792 | |||
793 | /* | ||
794 | * Allocate memory from the passed allocator. | ||
795 | */ | ||
796 | static u64 gk20a_buddy_alloc(struct gk20a_allocator *__a, u64 len) | ||
797 | { | ||
798 | u64 order, addr; | ||
799 | int pte_size; | ||
800 | struct gk20a_buddy_allocator *a = __a->priv; | ||
801 | |||
802 | gk20a_alloc_trace_func(); | ||
803 | |||
804 | alloc_lock(__a); | ||
805 | |||
806 | order = balloc_get_order(a, len); | ||
807 | |||
808 | if (order > a->max_order) { | ||
809 | alloc_unlock(__a); | ||
810 | alloc_dbg(balloc_owner(a), "Alloc fail\n"); | ||
811 | gk20a_alloc_trace_func_done(); | ||
812 | return 0; | ||
813 | } | ||
814 | |||
815 | /* | ||
816 | * For now pass the base address of the allocator's region to | ||
817 | * __get_pte_size(). This ensures we get the right page size for | ||
818 | * the alloc but we don't have to know what the real address is | ||
819 | * going to be quite yet. | ||
820 | * | ||
821 | * TODO: once userspace supports a unified address space pass 0 for | ||
822 | * the base. This will make only 'len' affect the PTE size. | ||
823 | */ | ||
824 | if (a->flags & GPU_BALLOC_GVA_SPACE) | ||
825 | pte_size = __get_pte_size(a->vm, a->base, len); | ||
826 | else | ||
827 | pte_size = BALLOC_PTE_SIZE_ANY; | ||
828 | |||
829 | addr = __balloc_do_alloc(a, order, pte_size); | ||
830 | |||
831 | if (addr) { | ||
832 | a->bytes_alloced += len; | ||
833 | a->bytes_alloced_real += balloc_order_to_len(a, order); | ||
834 | alloc_dbg(balloc_owner(a), | ||
835 | "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n", | ||
836 | addr, order, len, | ||
837 | pte_size == gmmu_page_size_big ? "big" : | ||
838 | pte_size == gmmu_page_size_small ? "small" : | ||
839 | "NA/any"); | ||
840 | } else { | ||
841 | alloc_dbg(balloc_owner(a), "Alloc failed: no mem!\n"); | ||
842 | } | ||
843 | |||
844 | alloc_unlock(__a); | ||
845 | |||
846 | gk20a_alloc_trace_func_done(); | ||
847 | return addr; | ||
848 | } | ||
849 | |||
850 | /* | ||
851 | * See if the passed range is actually available for allocation. If so, then | ||
852 | * return 1, otherwise return 0. | ||
853 | * | ||
854 | * TODO: Right now this uses the unoptimal approach of going through all | ||
855 | * outstanding allocations and checking their base/ends. This could be better. | ||
856 | */ | ||
857 | static int balloc_is_range_free(struct gk20a_buddy_allocator *a, | ||
858 | u64 base, u64 end) | ||
859 | { | ||
860 | struct rb_node *node; | ||
861 | struct gk20a_buddy *bud; | ||
862 | |||
863 | node = rb_first(&a->alloced_buddies); | ||
864 | if (!node) | ||
865 | return 1; /* No allocs yet. */ | ||
866 | |||
867 | bud = container_of(node, struct gk20a_buddy, alloced_entry); | ||
868 | |||
869 | while (bud->start < end) { | ||
870 | if ((bud->start > base && bud->start < end) || | ||
871 | (bud->end > base && bud->end < end)) | ||
872 | return 0; | ||
873 | |||
874 | node = rb_next(node); | ||
875 | if (!node) | ||
876 | break; | ||
877 | bud = container_of(node, struct gk20a_buddy, alloced_entry); | ||
878 | } | ||
879 | |||
880 | return 1; | ||
881 | } | ||
882 | |||
883 | static void balloc_alloc_fixed(struct gk20a_buddy_allocator *a, | ||
884 | struct gk20a_fixed_alloc *f) | ||
885 | { | ||
886 | struct rb_node **new = &(a->fixed_allocs.rb_node); | ||
887 | struct rb_node *parent = NULL; | ||
888 | |||
889 | while (*new) { | ||
890 | struct gk20a_fixed_alloc *falloc = | ||
891 | container_of(*new, struct gk20a_fixed_alloc, | ||
892 | alloced_entry); | ||
893 | |||
894 | parent = *new; | ||
895 | if (f->start < falloc->start) | ||
896 | new = &((*new)->rb_left); | ||
897 | else if (f->start > falloc->start) | ||
898 | new = &((*new)->rb_right); | ||
899 | else | ||
900 | BUG_ON("Duplicate entries in allocated list!\n"); | ||
901 | } | ||
902 | |||
903 | rb_link_node(&f->alloced_entry, parent, new); | ||
904 | rb_insert_color(&f->alloced_entry, &a->fixed_allocs); | ||
905 | } | ||
906 | |||
907 | /* | ||
908 | * Remove the passed buddy from the allocated buddy RB tree. Returns the | ||
909 | * deallocated buddy for further processing. | ||
910 | * | ||
911 | * @a must be locked. | ||
912 | */ | ||
913 | static struct gk20a_fixed_alloc *balloc_free_fixed( | ||
914 | struct gk20a_buddy_allocator *a, u64 addr) | ||
915 | { | ||
916 | struct rb_node *node = a->fixed_allocs.rb_node; | ||
917 | struct gk20a_fixed_alloc *falloc; | ||
918 | |||
919 | while (node) { | ||
920 | falloc = container_of(node, | ||
921 | struct gk20a_fixed_alloc, alloced_entry); | ||
922 | |||
923 | if (addr < falloc->start) | ||
924 | node = node->rb_left; | ||
925 | else if (addr > falloc->start) | ||
926 | node = node->rb_right; | ||
927 | else | ||
928 | break; | ||
929 | } | ||
930 | |||
931 | if (!node) | ||
932 | return NULL; | ||
933 | |||
934 | rb_erase(node, &a->fixed_allocs); | ||
935 | |||
936 | return falloc; | ||
937 | } | ||
938 | |||
939 | /* | ||
940 | * Find the parent range - doesn't necessarily need the parent to actually exist | ||
941 | * as a buddy. Finding an existing parent comes later... | ||
942 | */ | ||
943 | static void __balloc_get_parent_range(struct gk20a_buddy_allocator *a, | ||
944 | u64 base, u64 order, | ||
945 | u64 *pbase, u64 *porder) | ||
946 | { | ||
947 | u64 base_mask; | ||
948 | u64 shifted_base = balloc_base_shift(a, base); | ||
949 | |||
950 | order++; | ||
951 | base_mask = ~((a->blk_size << order) - 1); | ||
952 | |||
953 | shifted_base &= base_mask; | ||
954 | |||
955 | *pbase = balloc_base_unshift(a, shifted_base); | ||
956 | *porder = order; | ||
957 | } | ||
958 | |||
959 | /* | ||
960 | * Makes a buddy at the passed address. This will make all parent buddies | ||
961 | * necessary for this buddy to exist as well. | ||
962 | */ | ||
963 | static struct gk20a_buddy *__balloc_make_fixed_buddy( | ||
964 | struct gk20a_buddy_allocator *a, u64 base, u64 order) | ||
965 | { | ||
966 | struct gk20a_buddy *bud = NULL; | ||
967 | struct list_head *order_list; | ||
968 | u64 cur_order = order, cur_base = base; | ||
969 | |||
970 | /* | ||
971 | * Algo: | ||
972 | * 1. Keep jumping up a buddy order until we find the real buddy that | ||
973 | * this buddy exists in. | ||
974 | * 2. Then work our way down through the buddy tree until we hit a dead | ||
975 | * end. | ||
976 | * 3. Start splitting buddies until we split to the one we need to | ||
977 | * make. | ||
978 | */ | ||
979 | while (cur_order <= a->max_order) { | ||
980 | int found = 0; | ||
981 | |||
982 | order_list = balloc_get_order_list(a, cur_order); | ||
983 | list_for_each_entry(bud, order_list, buddy_entry) { | ||
984 | if (bud->start == cur_base) { | ||
985 | found = 1; | ||
986 | break; | ||
987 | } | ||
988 | } | ||
989 | |||
990 | if (found) | ||
991 | break; | ||
992 | |||
993 | __balloc_get_parent_range(a, cur_base, cur_order, | ||
994 | &cur_base, &cur_order); | ||
995 | } | ||
996 | |||
997 | if (cur_order > a->max_order) { | ||
998 | alloc_dbg(balloc_owner(a), "No buddy for range ???\n"); | ||
999 | return NULL; | ||
1000 | } | ||
1001 | |||
1002 | /* Split this buddy as necessary until we get the target buddy. */ | ||
1003 | while (bud->start != base || bud->order != order) { | ||
1004 | if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) { | ||
1005 | balloc_coalesce(a, bud); | ||
1006 | return NULL; | ||
1007 | } | ||
1008 | |||
1009 | if (base < bud->right->start) | ||
1010 | bud = bud->left; | ||
1011 | else | ||
1012 | bud = bud->right; | ||
1013 | |||
1014 | } | ||
1015 | |||
1016 | return bud; | ||
1017 | } | ||
1018 | |||
1019 | static u64 __balloc_do_alloc_fixed(struct gk20a_buddy_allocator *a, | ||
1020 | struct gk20a_fixed_alloc *falloc, | ||
1021 | u64 base, u64 len) | ||
1022 | { | ||
1023 | u64 shifted_base, inc_base; | ||
1024 | u64 align_order; | ||
1025 | |||
1026 | shifted_base = balloc_base_shift(a, base); | ||
1027 | if (shifted_base == 0) | ||
1028 | align_order = __fls(len >> a->blk_shift); | ||
1029 | else | ||
1030 | align_order = min_t(u64, | ||
1031 | __ffs(shifted_base >> a->blk_shift), | ||
1032 | __fls(len >> a->blk_shift)); | ||
1033 | |||
1034 | if (align_order > a->max_order) { | ||
1035 | alloc_dbg(balloc_owner(a), | ||
1036 | "Align order too big: %llu > %llu\n", | ||
1037 | align_order, a->max_order); | ||
1038 | return 0; | ||
1039 | } | ||
1040 | |||
1041 | /* | ||
1042 | * Generate a list of buddies that satisfy this allocation. | ||
1043 | */ | ||
1044 | inc_base = shifted_base; | ||
1045 | while (inc_base < (shifted_base + len)) { | ||
1046 | u64 order_len = balloc_order_to_len(a, align_order); | ||
1047 | u64 remaining; | ||
1048 | struct gk20a_buddy *bud; | ||
1049 | |||
1050 | bud = __balloc_make_fixed_buddy(a, | ||
1051 | balloc_base_unshift(a, inc_base), | ||
1052 | align_order); | ||
1053 | if (!bud) { | ||
1054 | alloc_dbg(balloc_owner(a), | ||
1055 | "Fixed buddy failed: {0x%llx, %llu}!\n", | ||
1056 | balloc_base_unshift(a, inc_base), | ||
1057 | align_order); | ||
1058 | goto err_and_cleanup; | ||
1059 | } | ||
1060 | |||
1061 | balloc_blist_rem(a, bud); | ||
1062 | balloc_alloc_buddy(a, bud); | ||
1063 | __balloc_buddy_list_add(a, bud, &falloc->buddies); | ||
1064 | |||
1065 | /* Book keeping. */ | ||
1066 | inc_base += order_len; | ||
1067 | remaining = (shifted_base + len) - inc_base; | ||
1068 | align_order = __ffs(inc_base >> a->blk_shift); | ||
1069 | |||
1070 | /* If we don't have much left - trim down align_order. */ | ||
1071 | if (balloc_order_to_len(a, align_order) > remaining) | ||
1072 | align_order = __balloc_max_order_in(a, inc_base, | ||
1073 | inc_base + remaining); | ||
1074 | } | ||
1075 | |||
1076 | return base; | ||
1077 | |||
1078 | err_and_cleanup: | ||
1079 | while (!list_empty(&falloc->buddies)) { | ||
1080 | struct gk20a_buddy *bud = list_first_entry(&falloc->buddies, | ||
1081 | struct gk20a_buddy, | ||
1082 | buddy_entry); | ||
1083 | |||
1084 | __balloc_buddy_list_rem(a, bud); | ||
1085 | balloc_free_buddy(a, bud->start); | ||
1086 | kmem_cache_free(buddy_cache, bud); | ||
1087 | } | ||
1088 | |||
1089 | return 0; | ||
1090 | } | ||
1091 | |||
1092 | /* | ||
1093 | * Allocate a fixed address allocation. The address of the allocation is @base | ||
1094 | * and the length is @len. This is not a typical buddy allocator operation and | ||
1095 | * as such has a high posibility of failure if the address space is heavily in | ||
1096 | * use. | ||
1097 | * | ||
1098 | * Please do not use this function unless _absolutely_ necessary. | ||
1099 | */ | ||
1100 | static u64 gk20a_buddy_alloc_fixed(struct gk20a_allocator *__a, | ||
1101 | u64 base, u64 len) | ||
1102 | { | ||
1103 | u64 ret, real_bytes = 0; | ||
1104 | struct gk20a_buddy *bud; | ||
1105 | struct gk20a_fixed_alloc *falloc = NULL; | ||
1106 | struct gk20a_buddy_allocator *a = __a->priv; | ||
1107 | |||
1108 | gk20a_alloc_trace_func(); | ||
1109 | |||
1110 | /* If base isn't aligned to an order 0 block, fail. */ | ||
1111 | if (base & (a->blk_size - 1)) | ||
1112 | goto fail; | ||
1113 | |||
1114 | if (len == 0) | ||
1115 | goto fail; | ||
1116 | |||
1117 | falloc = kmalloc(sizeof(*falloc), GFP_KERNEL); | ||
1118 | if (!falloc) | ||
1119 | goto fail; | ||
1120 | |||
1121 | INIT_LIST_HEAD(&falloc->buddies); | ||
1122 | falloc->start = base; | ||
1123 | falloc->end = base + len; | ||
1124 | |||
1125 | alloc_lock(__a); | ||
1126 | if (!balloc_is_range_free(a, base, base + len)) { | ||
1127 | alloc_dbg(balloc_owner(a), | ||
1128 | "Range not free: 0x%llx -> 0x%llx\n", | ||
1129 | base, base + len); | ||
1130 | goto fail_unlock; | ||
1131 | } | ||
1132 | |||
1133 | ret = __balloc_do_alloc_fixed(a, falloc, base, len); | ||
1134 | if (!ret) { | ||
1135 | alloc_dbg(balloc_owner(a), | ||
1136 | "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n", | ||
1137 | base, base + len); | ||
1138 | goto fail_unlock; | ||
1139 | } | ||
1140 | |||
1141 | balloc_alloc_fixed(a, falloc); | ||
1142 | |||
1143 | list_for_each_entry(bud, &falloc->buddies, buddy_entry) | ||
1144 | real_bytes += (bud->end - bud->start); | ||
1145 | |||
1146 | a->bytes_alloced += len; | ||
1147 | a->bytes_alloced_real += real_bytes; | ||
1148 | |||
1149 | alloc_unlock(__a); | ||
1150 | alloc_dbg(balloc_owner(a), "Alloc (fixed) 0x%llx\n", base); | ||
1151 | |||
1152 | gk20a_alloc_trace_func_done(); | ||
1153 | return base; | ||
1154 | |||
1155 | fail_unlock: | ||
1156 | alloc_unlock(__a); | ||
1157 | fail: | ||
1158 | kfree(falloc); | ||
1159 | gk20a_alloc_trace_func_done(); | ||
1160 | return 0; | ||
1161 | } | ||
1162 | |||
1163 | static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a, | ||
1164 | struct gk20a_fixed_alloc *falloc) | ||
1165 | { | ||
1166 | struct gk20a_buddy *bud; | ||
1167 | |||
1168 | while (!list_empty(&falloc->buddies)) { | ||
1169 | bud = list_first_entry(&falloc->buddies, | ||
1170 | struct gk20a_buddy, | ||
1171 | buddy_entry); | ||
1172 | __balloc_buddy_list_rem(a, bud); | ||
1173 | |||
1174 | balloc_free_buddy(a, bud->start); | ||
1175 | balloc_blist_add(a, bud); | ||
1176 | a->bytes_freed += balloc_order_to_len(a, bud->order); | ||
1177 | |||
1178 | /* | ||
1179 | * Attemp to defrag the allocation. | ||
1180 | */ | ||
1181 | balloc_coalesce(a, bud); | ||
1182 | } | ||
1183 | |||
1184 | kfree(falloc); | ||
1185 | } | ||
1186 | |||
1187 | /* | ||
1188 | * Free the passed allocation. | ||
1189 | */ | ||
1190 | static void gk20a_buddy_free(struct gk20a_allocator *__a, u64 addr) | ||
1191 | { | ||
1192 | struct gk20a_buddy *bud; | ||
1193 | struct gk20a_fixed_alloc *falloc; | ||
1194 | struct gk20a_buddy_allocator *a = __a->priv; | ||
1195 | |||
1196 | gk20a_alloc_trace_func(); | ||
1197 | |||
1198 | if (!addr) { | ||
1199 | gk20a_alloc_trace_func_done(); | ||
1200 | return; | ||
1201 | } | ||
1202 | |||
1203 | alloc_lock(__a); | ||
1204 | |||
1205 | /* | ||
1206 | * First see if this is a fixed alloc. If not fall back to a regular | ||
1207 | * buddy. | ||
1208 | */ | ||
1209 | falloc = balloc_free_fixed(a, addr); | ||
1210 | if (falloc) { | ||
1211 | __balloc_do_free_fixed(a, falloc); | ||
1212 | goto done; | ||
1213 | } | ||
1214 | |||
1215 | bud = balloc_free_buddy(a, addr); | ||
1216 | if (!bud) | ||
1217 | goto done; | ||
1218 | |||
1219 | balloc_blist_add(a, bud); | ||
1220 | a->bytes_freed += balloc_order_to_len(a, bud->order); | ||
1221 | |||
1222 | /* | ||
1223 | * Attemp to defrag the allocation. | ||
1224 | */ | ||
1225 | balloc_coalesce(a, bud); | ||
1226 | |||
1227 | done: | ||
1228 | alloc_unlock(__a); | ||
1229 | alloc_dbg(balloc_owner(a), "Free 0x%llx\n", addr); | ||
1230 | gk20a_alloc_trace_func_done(); | ||
1231 | return; | ||
1232 | } | ||
1233 | |||
1234 | /* | ||
1235 | * Print the buddy allocator top level stats. If you pass @s as NULL then the | ||
1236 | * stats are printed to the kernel log. This lets this code be used for | ||
1237 | * debugging purposes internal to the allocator. | ||
1238 | */ | ||
1239 | static void gk20a_buddy_print_stats(struct gk20a_allocator *__a, | ||
1240 | struct seq_file *s, int lock) | ||
1241 | { | ||
1242 | int i; | ||
1243 | struct rb_node *node; | ||
1244 | struct gk20a_fixed_alloc *falloc; | ||
1245 | struct gk20a_buddy_allocator *a = __a->priv; | ||
1246 | |||
1247 | __alloc_pstat(s, __a, "base = %llu, limit = %llu, blk_size = %llu\n", | ||
1248 | a->base, a->length, a->blk_size); | ||
1249 | __alloc_pstat(s, __a, "Internal params:\n"); | ||
1250 | __alloc_pstat(s, __a, " start = 0x%llx\n", a->start); | ||
1251 | __alloc_pstat(s, __a, " end = 0x%llx\n", a->end); | ||
1252 | __alloc_pstat(s, __a, " count = 0x%llx\n", a->count); | ||
1253 | __alloc_pstat(s, __a, " blks = 0x%llx\n", a->blks); | ||
1254 | __alloc_pstat(s, __a, " max_order = %llu\n", a->max_order); | ||
1255 | |||
1256 | __alloc_pstat(s, __a, "Buddy blocks:\n"); | ||
1257 | __alloc_pstat(s, __a, " Order Free Alloced Split\n"); | ||
1258 | __alloc_pstat(s, __a, " ----- ---- ------- -----\n"); | ||
1259 | |||
1260 | if (lock) | ||
1261 | alloc_lock(__a); | ||
1262 | for (i = a->max_order; i >= 0; i--) { | ||
1263 | if (a->buddy_list_len[i] == 0 && | ||
1264 | a->buddy_list_alloced[i] == 0 && | ||
1265 | a->buddy_list_split[i] == 0) | ||
1266 | continue; | ||
1267 | |||
1268 | __alloc_pstat(s, __a, " %3d %-7llu %-9llu %llu\n", i, | ||
1269 | a->buddy_list_len[i], | ||
1270 | a->buddy_list_alloced[i], | ||
1271 | a->buddy_list_split[i]); | ||
1272 | } | ||
1273 | |||
1274 | __alloc_pstat(s, __a, "\n"); | ||
1275 | |||
1276 | for (node = rb_first(&a->fixed_allocs), i = 1; | ||
1277 | node != NULL; | ||
1278 | node = rb_next(node)) { | ||
1279 | falloc = container_of(node, | ||
1280 | struct gk20a_fixed_alloc, alloced_entry); | ||
1281 | |||
1282 | __alloc_pstat(s, __a, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n", | ||
1283 | i, falloc->start, falloc->end); | ||
1284 | } | ||
1285 | |||
1286 | __alloc_pstat(s, __a, "\n"); | ||
1287 | __alloc_pstat(s, __a, "Bytes allocated: %llu\n", | ||
1288 | a->bytes_alloced); | ||
1289 | __alloc_pstat(s, __a, "Bytes allocated (real): %llu\n", | ||
1290 | a->bytes_alloced_real); | ||
1291 | __alloc_pstat(s, __a, "Bytes freed: %llu\n", | ||
1292 | a->bytes_freed); | ||
1293 | |||
1294 | if (lock) | ||
1295 | alloc_unlock(__a); | ||
1296 | } | ||
1297 | |||
1298 | void gk20a_alloc_print_stats(struct gk20a_allocator *__a, | 105 | void gk20a_alloc_print_stats(struct gk20a_allocator *__a, |
1299 | struct seq_file *s, int lock) | 106 | struct seq_file *s, int lock) |
1300 | { | 107 | { |
@@ -1322,7 +129,7 @@ static const struct file_operations __alloc_fops = { | |||
1322 | .release = single_release, | 129 | .release = single_release, |
1323 | }; | 130 | }; |
1324 | 131 | ||
1325 | static void gk20a_init_alloc_debug(struct gk20a_allocator *a) | 132 | void gk20a_init_alloc_debug(struct gk20a_allocator *a) |
1326 | { | 133 | { |
1327 | if (!gk20a_alloc_debugfs_root) | 134 | if (!gk20a_alloc_debugfs_root) |
1328 | return; | 135 | return; |
@@ -1332,7 +139,15 @@ static void gk20a_init_alloc_debug(struct gk20a_allocator *a) | |||
1332 | a, &__alloc_fops); | 139 | a, &__alloc_fops); |
1333 | } | 140 | } |
1334 | 141 | ||
1335 | #ifdef CONFIG_DEBUG_FS | 142 | void gk20a_fini_alloc_debug(struct gk20a_allocator *a) |
143 | { | ||
144 | if (!gk20a_alloc_debugfs_root) | ||
145 | return; | ||
146 | |||
147 | if (!IS_ERR_OR_NULL(a->debugfs_entry)) | ||
148 | debugfs_remove(a->debugfs_entry); | ||
149 | } | ||
150 | |||
1336 | void gk20a_alloc_debugfs_init(struct platform_device *pdev) | 151 | void gk20a_alloc_debugfs_init(struct platform_device *pdev) |
1337 | { | 152 | { |
1338 | struct gk20a_platform *platform = platform_get_drvdata(pdev); | 153 | struct gk20a_platform *platform = platform_get_drvdata(pdev); |
@@ -1345,4 +160,3 @@ void gk20a_alloc_debugfs_init(struct platform_device *pdev) | |||
1345 | debugfs_create_u32("tracing", 0664, gk20a_alloc_debugfs_root, | 160 | debugfs_create_u32("tracing", 0664, gk20a_alloc_debugfs_root, |
1346 | &gk20a_alloc_tracing_on); | 161 | &gk20a_alloc_tracing_on); |
1347 | } | 162 | } |
1348 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h index 74e23e6c..06819700 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h | |||
@@ -17,8 +17,6 @@ | |||
17 | #ifndef GK20A_ALLOCATOR_H | 17 | #ifndef GK20A_ALLOCATOR_H |
18 | #define GK20A_ALLOCATOR_H | 18 | #define GK20A_ALLOCATOR_H |
19 | 19 | ||
20 | #include <linux/list.h> | ||
21 | #include <linux/rbtree.h> | ||
22 | #include <linux/debugfs.h> | 20 | #include <linux/debugfs.h> |
23 | #include <linux/seq_file.h> | 21 | #include <linux/seq_file.h> |
24 | #include <linux/platform_device.h> | 22 | #include <linux/platform_device.h> |
@@ -63,138 +61,6 @@ struct gk20a_allocator_ops { | |||
63 | struct seq_file *s, int lock); | 61 | struct seq_file *s, int lock); |
64 | }; | 62 | }; |
65 | 63 | ||
66 | /* | ||
67 | * Each buddy is an element in a binary tree. | ||
68 | */ | ||
69 | struct gk20a_buddy { | ||
70 | struct gk20a_buddy *parent; /* Parent node. */ | ||
71 | struct gk20a_buddy *buddy; /* This node's buddy. */ | ||
72 | struct gk20a_buddy *left; /* Lower address sub-node. */ | ||
73 | struct gk20a_buddy *right; /* Higher address sub-node. */ | ||
74 | |||
75 | struct list_head buddy_entry; /* List entry for various lists. */ | ||
76 | struct rb_node alloced_entry; /* RB tree of allocations. */ | ||
77 | |||
78 | u64 start; /* Start address of this buddy. */ | ||
79 | u64 end; /* End address of this buddy. */ | ||
80 | u64 order; /* Buddy order. */ | ||
81 | |||
82 | #define BALLOC_BUDDY_ALLOCED 0x1 | ||
83 | #define BALLOC_BUDDY_SPLIT 0x2 | ||
84 | #define BALLOC_BUDDY_IN_LIST 0x4 | ||
85 | int flags; /* List of associated flags. */ | ||
86 | |||
87 | /* | ||
88 | * Size of the PDE this buddy is using. This allows for grouping like | ||
89 | * sized allocations into the same PDE. | ||
90 | */ | ||
91 | #define BALLOC_PTE_SIZE_ANY 0x0 | ||
92 | #define BALLOC_PTE_SIZE_SMALL 0x1 | ||
93 | #define BALLOC_PTE_SIZE_BIG 0x2 | ||
94 | int pte_size; | ||
95 | }; | ||
96 | |||
97 | #define __buddy_flag_ops(flag, flag_up) \ | ||
98 | static inline int buddy_is_ ## flag(struct gk20a_buddy *b) \ | ||
99 | { \ | ||
100 | return b->flags & BALLOC_BUDDY_ ## flag_up; \ | ||
101 | } \ | ||
102 | static inline void buddy_set_ ## flag(struct gk20a_buddy *b) \ | ||
103 | { \ | ||
104 | b->flags |= BALLOC_BUDDY_ ## flag_up; \ | ||
105 | } \ | ||
106 | static inline void buddy_clr_ ## flag(struct gk20a_buddy *b) \ | ||
107 | { \ | ||
108 | b->flags &= ~BALLOC_BUDDY_ ## flag_up; \ | ||
109 | } | ||
110 | |||
111 | /* | ||
112 | * int buddy_is_alloced(struct gk20a_buddy *b); | ||
113 | * void buddy_set_alloced(struct gk20a_buddy *b); | ||
114 | * void buddy_clr_alloced(struct gk20a_buddy *b); | ||
115 | * | ||
116 | * int buddy_is_split(struct gk20a_buddy *b); | ||
117 | * void buddy_set_split(struct gk20a_buddy *b); | ||
118 | * void buddy_clr_split(struct gk20a_buddy *b); | ||
119 | * | ||
120 | * int buddy_is_in_list(struct gk20a_buddy *b); | ||
121 | * void buddy_set_in_list(struct gk20a_buddy *b); | ||
122 | * void buddy_clr_in_list(struct gk20a_buddy *b); | ||
123 | */ | ||
124 | __buddy_flag_ops(alloced, ALLOCED); | ||
125 | __buddy_flag_ops(split, SPLIT); | ||
126 | __buddy_flag_ops(in_list, IN_LIST); | ||
127 | |||
128 | /* | ||
129 | * Keeps info for a fixed allocation. | ||
130 | */ | ||
131 | struct gk20a_fixed_alloc { | ||
132 | struct list_head buddies; /* List of buddies. */ | ||
133 | struct rb_node alloced_entry; /* RB tree of fixed allocations. */ | ||
134 | |||
135 | u64 start; /* Start of fixed block. */ | ||
136 | u64 end; /* End address. */ | ||
137 | }; | ||
138 | |||
139 | /* | ||
140 | * GPU buddy allocator for the various GPU address spaces. Each addressable unit | ||
141 | * doesn't have to correspond to a byte. In some cases each unit is a more | ||
142 | * complex object such as a comp_tag line or the like. | ||
143 | * | ||
144 | * The max order is computed based on the size of the minimum order and the size | ||
145 | * of the address space. | ||
146 | * | ||
147 | * order_size is the size of an order 0 buddy. | ||
148 | */ | ||
149 | struct gk20a_buddy_allocator { | ||
150 | struct gk20a_allocator *owner; /* Owner of this buddy allocator. */ | ||
151 | struct vm_gk20a *vm; /* Parent VM - can be NULL. */ | ||
152 | |||
153 | u64 base; /* Base address of the space. */ | ||
154 | u64 length; /* Length of the space. */ | ||
155 | u64 blk_size; /* Size of order 0 allocation. */ | ||
156 | u64 blk_shift; /* Shift to divide by blk_size. */ | ||
157 | |||
158 | int init; /* Non-zero if initialized. */ | ||
159 | |||
160 | /* Internal stuff. */ | ||
161 | u64 start; /* Real start (aligned to blk_size). */ | ||
162 | u64 end; /* Real end, trimmed if needed. */ | ||
163 | u64 count; /* Count of objects in space. */ | ||
164 | u64 blks; /* Count of blks in the space. */ | ||
165 | u64 max_order; /* Specific maximum order. */ | ||
166 | |||
167 | struct rb_root alloced_buddies; /* Outstanding allocations. */ | ||
168 | struct rb_root fixed_allocs; /* Outstanding fixed allocations. */ | ||
169 | |||
170 | /* | ||
171 | * Impose an upper bound on the maximum order. | ||
172 | */ | ||
173 | #define GPU_BALLOC_MAX_ORDER 31 | ||
174 | #define GPU_BALLOC_ORDER_LIST_LEN (GPU_BALLOC_MAX_ORDER + 1) | ||
175 | |||
176 | struct list_head buddy_list[GPU_BALLOC_ORDER_LIST_LEN]; | ||
177 | u64 buddy_list_len[GPU_BALLOC_ORDER_LIST_LEN]; | ||
178 | u64 buddy_list_split[GPU_BALLOC_ORDER_LIST_LEN]; | ||
179 | u64 buddy_list_alloced[GPU_BALLOC_ORDER_LIST_LEN]; | ||
180 | |||
181 | /* | ||
182 | * This is for when the allocator is managing a GVA space (the | ||
183 | * GPU_BALLOC_GVA_SPACE bit is set in @flags). This requires | ||
184 | * that we group like sized allocations into PDE blocks. | ||
185 | */ | ||
186 | u64 pte_blk_order; | ||
187 | |||
188 | int inited; | ||
189 | |||
190 | #define GPU_BALLOC_GVA_SPACE 0x1 | ||
191 | u64 flags; | ||
192 | |||
193 | u64 bytes_alloced; | ||
194 | u64 bytes_alloced_real; | ||
195 | u64 bytes_freed; | ||
196 | }; | ||
197 | |||
198 | struct gk20a_allocator { | 64 | struct gk20a_allocator { |
199 | char name[32]; | 65 | char name[32]; |
200 | struct mutex lock; | 66 | struct mutex lock; |
@@ -205,6 +71,11 @@ struct gk20a_allocator { | |||
205 | struct dentry *debugfs_entry; | 71 | struct dentry *debugfs_entry; |
206 | }; | 72 | }; |
207 | 73 | ||
74 | /* | ||
75 | * Allocator flags. | ||
76 | */ | ||
77 | #define GPU_BALLOC_GVA_SPACE 0x1 | ||
78 | |||
208 | static inline void alloc_lock(struct gk20a_allocator *a) | 79 | static inline void alloc_lock(struct gk20a_allocator *a) |
209 | { | 80 | { |
210 | mutex_lock(&a->lock); | 81 | mutex_lock(&a->lock); |
@@ -215,42 +86,6 @@ static inline void alloc_unlock(struct gk20a_allocator *a) | |||
215 | mutex_unlock(&a->lock); | 86 | mutex_unlock(&a->lock); |
216 | } | 87 | } |
217 | 88 | ||
218 | static inline struct gk20a_buddy_allocator *buddy_allocator( | ||
219 | struct gk20a_allocator *a) | ||
220 | { | ||
221 | return (struct gk20a_buddy_allocator *)a->priv; | ||
222 | } | ||
223 | |||
224 | static inline struct list_head *balloc_get_order_list( | ||
225 | struct gk20a_buddy_allocator *a, int order) | ||
226 | { | ||
227 | return &a->buddy_list[order]; | ||
228 | } | ||
229 | |||
230 | static inline u64 balloc_order_to_len(struct gk20a_buddy_allocator *a, | ||
231 | int order) | ||
232 | { | ||
233 | return (1 << order) * a->blk_size; | ||
234 | } | ||
235 | |||
236 | static inline u64 balloc_base_shift(struct gk20a_buddy_allocator *a, | ||
237 | u64 base) | ||
238 | { | ||
239 | return base - a->start; | ||
240 | } | ||
241 | |||
242 | static inline u64 balloc_base_unshift(struct gk20a_buddy_allocator *a, | ||
243 | u64 base) | ||
244 | { | ||
245 | return base + a->start; | ||
246 | } | ||
247 | |||
248 | static inline struct gk20a_allocator *balloc_owner( | ||
249 | struct gk20a_buddy_allocator *a) | ||
250 | { | ||
251 | return a->owner; | ||
252 | } | ||
253 | |||
254 | /* | 89 | /* |
255 | * Buddy allocator specific initializers. | 90 | * Buddy allocator specific initializers. |
256 | */ | 91 | */ |
@@ -262,6 +97,8 @@ int gk20a_buddy_allocator_init(struct gk20a_allocator *allocator, | |||
262 | const char *name, u64 base, u64 size, | 97 | const char *name, u64 base, u64 size, |
263 | u64 blk_size, u64 flags); | 98 | u64 blk_size, u64 flags); |
264 | 99 | ||
100 | #define GPU_BALLOC_MAX_ORDER 31 | ||
101 | |||
265 | /* | 102 | /* |
266 | * Allocator APIs. | 103 | * Allocator APIs. |
267 | */ | 104 | */ |
@@ -282,10 +119,33 @@ void gk20a_alloc_print_stats(struct gk20a_allocator *a, | |||
282 | struct seq_file *s, int lock); | 119 | struct seq_file *s, int lock); |
283 | 120 | ||
284 | /* | 121 | /* |
122 | * Common functionality for the internals of the allocators. | ||
123 | */ | ||
124 | void gk20a_init_alloc_debug(struct gk20a_allocator *a); | ||
125 | void gk20a_fini_alloc_debug(struct gk20a_allocator *a); | ||
126 | int __gk20a_alloc_common_init(struct gk20a_allocator *a, | ||
127 | const char *name, void *priv, | ||
128 | const struct gk20a_allocator_ops *ops); | ||
129 | |||
130 | /* | ||
285 | * Debug stuff. | 131 | * Debug stuff. |
286 | */ | 132 | */ |
133 | extern u32 gk20a_alloc_tracing_on; | ||
134 | |||
287 | void gk20a_alloc_debugfs_init(struct platform_device *pdev); | 135 | void gk20a_alloc_debugfs_init(struct platform_device *pdev); |
288 | 136 | ||
137 | #define gk20a_alloc_trace_func() \ | ||
138 | do { \ | ||
139 | if (gk20a_alloc_tracing_on) \ | ||
140 | trace_printk("%s\n", __func__); \ | ||
141 | } while (0) | ||
142 | |||
143 | #define gk20a_alloc_trace_func_done() \ | ||
144 | do { \ | ||
145 | if (gk20a_alloc_tracing_on) \ | ||
146 | trace_printk("%s_done\n", __func__); \ | ||
147 | } while (0) | ||
148 | |||
289 | #define __alloc_pstat(seq, allocator, fmt, arg...) \ | 149 | #define __alloc_pstat(seq, allocator, fmt, arg...) \ |
290 | do { \ | 150 | do { \ |
291 | if (s) \ | 151 | if (s) \ |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c new file mode 100644 index 00000000..4a1df339 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c | |||
@@ -0,0 +1,1187 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/slab.h> | ||
19 | |||
20 | #include "mm_gk20a.h" | ||
21 | #include "platform_gk20a.h" | ||
22 | #include "gk20a_allocator.h" | ||
23 | #include "buddy_allocator_priv.h" | ||
24 | |||
25 | static struct kmem_cache *buddy_cache; /* slab cache for meta data. */ | ||
26 | |||
27 | /* Some other buddy allocator functions. */ | ||
28 | static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a, | ||
29 | u64 addr); | ||
30 | static void balloc_coalesce(struct gk20a_buddy_allocator *a, | ||
31 | struct gk20a_buddy *b); | ||
32 | static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a, | ||
33 | struct gk20a_fixed_alloc *falloc); | ||
34 | |||
35 | /* | ||
36 | * This function is not present in older kernel's list.h code. | ||
37 | */ | ||
38 | #ifndef list_last_entry | ||
39 | #define list_last_entry(ptr, type, member) \ | ||
40 | list_entry((ptr)->prev, type, member) | ||
41 | #endif | ||
42 | |||
43 | /* | ||
44 | * GPU buddy allocator for various address spaces. | ||
45 | * | ||
46 | * Current limitations: | ||
47 | * o A fixed allocation could potentially be made that borders PDEs with | ||
48 | * different PTE sizes. This would require that fixed buffer to have | ||
49 | * different sized PTEs for different parts of the allocation. Probably | ||
50 | * best to just require PDE alignment for fixed address allocs. | ||
51 | * | ||
52 | * o It is currently possible to make an allocator that has a buddy alignment | ||
53 | * out of sync with the PDE block size alignment. A simple example is a | ||
54 | * 32GB address space starting at byte 1. Every buddy is shifted off by 1 | ||
55 | * which means each buddy corresponf to more than one actual GPU page. The | ||
56 | * best way to fix this is probably just require PDE blocksize alignment | ||
57 | * for the start of the address space. At the moment all allocators are | ||
58 | * easily PDE aligned so this hasn't been a problem. | ||
59 | */ | ||
60 | |||
61 | /* | ||
62 | * Pick a suitable maximum order for this allocator. | ||
63 | * | ||
64 | * Hueristic: Just guessing that the best max order is the largest single | ||
65 | * block that will fit in the address space. | ||
66 | */ | ||
67 | static void balloc_compute_max_order(struct gk20a_buddy_allocator *a) | ||
68 | { | ||
69 | u64 true_max_order = ilog2(a->blks); | ||
70 | |||
71 | if (a->max_order == 0) { | ||
72 | a->max_order = true_max_order; | ||
73 | return; | ||
74 | } | ||
75 | |||
76 | if (a->max_order > true_max_order) | ||
77 | a->max_order = true_max_order; | ||
78 | if (a->max_order > GPU_BALLOC_MAX_ORDER) | ||
79 | a->max_order = GPU_BALLOC_MAX_ORDER; | ||
80 | } | ||
81 | |||
82 | /* | ||
83 | * Since we can only allocate in chucks of a->blk_size we need to trim off | ||
84 | * any excess data that is not aligned to a->blk_size. | ||
85 | */ | ||
86 | static void balloc_allocator_align(struct gk20a_buddy_allocator *a) | ||
87 | { | ||
88 | a->start = ALIGN(a->base, a->blk_size); | ||
89 | WARN_ON(a->start != a->base); | ||
90 | a->end = (a->base + a->length) & ~(a->blk_size - 1); | ||
91 | a->count = a->end - a->start; | ||
92 | a->blks = a->count >> a->blk_shift; | ||
93 | } | ||
94 | |||
95 | /* | ||
96 | * Pass NULL for parent if you want a top level buddy. | ||
97 | */ | ||
98 | static struct gk20a_buddy *balloc_new_buddy(struct gk20a_buddy_allocator *a, | ||
99 | struct gk20a_buddy *parent, | ||
100 | u64 start, u64 order) | ||
101 | { | ||
102 | struct gk20a_buddy *new_buddy; | ||
103 | |||
104 | new_buddy = kmem_cache_alloc(buddy_cache, GFP_KERNEL); | ||
105 | if (!new_buddy) | ||
106 | return NULL; | ||
107 | |||
108 | memset(new_buddy, 0, sizeof(struct gk20a_buddy)); | ||
109 | |||
110 | new_buddy->parent = parent; | ||
111 | new_buddy->start = start; | ||
112 | new_buddy->order = order; | ||
113 | new_buddy->end = start + (1 << order) * a->blk_size; | ||
114 | |||
115 | return new_buddy; | ||
116 | } | ||
117 | |||
118 | static void __balloc_buddy_list_add(struct gk20a_buddy_allocator *a, | ||
119 | struct gk20a_buddy *b, | ||
120 | struct list_head *list) | ||
121 | { | ||
122 | if (buddy_is_in_list(b)) { | ||
123 | alloc_dbg(balloc_owner(a), | ||
124 | "Oops: adding added buddy (%llu:0x%llx)\n", | ||
125 | b->order, b->start); | ||
126 | BUG(); | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * Add big PTE blocks to the tail, small to the head for GVA spaces. | ||
131 | * This lets the code that checks if there are available blocks check | ||
132 | * without cycling through the entire list. | ||
133 | */ | ||
134 | if (a->flags & GPU_BALLOC_GVA_SPACE && | ||
135 | b->pte_size == BALLOC_PTE_SIZE_BIG) | ||
136 | list_add_tail(&b->buddy_entry, list); | ||
137 | else | ||
138 | list_add(&b->buddy_entry, list); | ||
139 | |||
140 | buddy_set_in_list(b); | ||
141 | } | ||
142 | |||
143 | static void __balloc_buddy_list_rem(struct gk20a_buddy_allocator *a, | ||
144 | struct gk20a_buddy *b) | ||
145 | { | ||
146 | if (!buddy_is_in_list(b)) { | ||
147 | alloc_dbg(balloc_owner(a), | ||
148 | "Oops: removing removed buddy (%llu:0x%llx)\n", | ||
149 | b->order, b->start); | ||
150 | BUG(); | ||
151 | } | ||
152 | |||
153 | list_del_init(&b->buddy_entry); | ||
154 | buddy_clr_in_list(b); | ||
155 | } | ||
156 | |||
157 | /* | ||
158 | * Add a buddy to one of the buddy lists and deal with the necessary | ||
159 | * book keeping. Adds the buddy to the list specified by the buddy's order. | ||
160 | */ | ||
161 | static void balloc_blist_add(struct gk20a_buddy_allocator *a, | ||
162 | struct gk20a_buddy *b) | ||
163 | { | ||
164 | __balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order)); | ||
165 | a->buddy_list_len[b->order]++; | ||
166 | } | ||
167 | |||
168 | static void balloc_blist_rem(struct gk20a_buddy_allocator *a, | ||
169 | struct gk20a_buddy *b) | ||
170 | { | ||
171 | __balloc_buddy_list_rem(a, b); | ||
172 | a->buddy_list_len[b->order]--; | ||
173 | } | ||
174 | |||
175 | static u64 balloc_get_order(struct gk20a_buddy_allocator *a, u64 len) | ||
176 | { | ||
177 | if (len == 0) | ||
178 | return 0; | ||
179 | |||
180 | len--; | ||
181 | len >>= a->blk_shift; | ||
182 | |||
183 | return fls(len); | ||
184 | } | ||
185 | |||
186 | static u64 __balloc_max_order_in(struct gk20a_buddy_allocator *a, | ||
187 | u64 start, u64 end) | ||
188 | { | ||
189 | u64 size = (end - start) >> a->blk_shift; | ||
190 | |||
191 | if (size > 0) | ||
192 | return min_t(u64, ilog2(size), a->max_order); | ||
193 | else | ||
194 | return GPU_BALLOC_MAX_ORDER; | ||
195 | } | ||
196 | |||
197 | /* | ||
198 | * Initialize the buddy lists. | ||
199 | */ | ||
200 | static int balloc_init_lists(struct gk20a_buddy_allocator *a) | ||
201 | { | ||
202 | int i; | ||
203 | u64 bstart, bend, order; | ||
204 | struct gk20a_buddy *buddy; | ||
205 | |||
206 | bstart = a->start; | ||
207 | bend = a->end; | ||
208 | |||
209 | /* First make sure the LLs are valid. */ | ||
210 | for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) | ||
211 | INIT_LIST_HEAD(balloc_get_order_list(a, i)); | ||
212 | |||
213 | while (bstart < bend) { | ||
214 | order = __balloc_max_order_in(a, bstart, bend); | ||
215 | |||
216 | buddy = balloc_new_buddy(a, NULL, bstart, order); | ||
217 | if (!buddy) | ||
218 | goto cleanup; | ||
219 | |||
220 | balloc_blist_add(a, buddy); | ||
221 | bstart += balloc_order_to_len(a, order); | ||
222 | } | ||
223 | |||
224 | return 0; | ||
225 | |||
226 | cleanup: | ||
227 | for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) { | ||
228 | if (!list_empty(balloc_get_order_list(a, i))) { | ||
229 | buddy = list_first_entry(balloc_get_order_list(a, i), | ||
230 | struct gk20a_buddy, buddy_entry); | ||
231 | balloc_blist_rem(a, buddy); | ||
232 | kmem_cache_free(buddy_cache, buddy); | ||
233 | } | ||
234 | } | ||
235 | |||
236 | return -ENOMEM; | ||
237 | } | ||
238 | |||
239 | /* | ||
240 | * Clean up and destroy the passed allocator. | ||
241 | */ | ||
242 | static void gk20a_buddy_allocator_destroy(struct gk20a_allocator *__a) | ||
243 | { | ||
244 | int i; | ||
245 | struct rb_node *node; | ||
246 | struct gk20a_buddy *bud; | ||
247 | struct gk20a_fixed_alloc *falloc; | ||
248 | struct gk20a_buddy_allocator *a = __a->priv; | ||
249 | |||
250 | alloc_lock(__a); | ||
251 | |||
252 | gk20a_fini_alloc_debug(__a); | ||
253 | |||
254 | /* | ||
255 | * Free the fixed allocs first. | ||
256 | */ | ||
257 | while ((node = rb_first(&a->fixed_allocs)) != NULL) { | ||
258 | falloc = container_of(node, | ||
259 | struct gk20a_fixed_alloc, alloced_entry); | ||
260 | |||
261 | rb_erase(node, &a->fixed_allocs); | ||
262 | __balloc_do_free_fixed(a, falloc); | ||
263 | } | ||
264 | |||
265 | /* | ||
266 | * And now free all outstanding allocations. | ||
267 | */ | ||
268 | while ((node = rb_first(&a->alloced_buddies)) != NULL) { | ||
269 | bud = container_of(node, struct gk20a_buddy, alloced_entry); | ||
270 | balloc_free_buddy(a, bud->start); | ||
271 | balloc_blist_add(a, bud); | ||
272 | balloc_coalesce(a, bud); | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * Now clean up the unallocated buddies. | ||
277 | */ | ||
278 | for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) { | ||
279 | BUG_ON(a->buddy_list_alloced[i] != 0); | ||
280 | |||
281 | while (!list_empty(balloc_get_order_list(a, i))) { | ||
282 | bud = list_first_entry(balloc_get_order_list(a, i), | ||
283 | struct gk20a_buddy, buddy_entry); | ||
284 | balloc_blist_rem(a, bud); | ||
285 | kmem_cache_free(buddy_cache, bud); | ||
286 | } | ||
287 | |||
288 | if (a->buddy_list_len[i] != 0) { | ||
289 | pr_info("Excess buddies!!! (%d: %llu)\n", | ||
290 | i, a->buddy_list_len[i]); | ||
291 | BUG(); | ||
292 | } | ||
293 | if (a->buddy_list_split[i] != 0) { | ||
294 | pr_info("Excess split nodes!!! (%d: %llu)\n", | ||
295 | i, a->buddy_list_split[i]); | ||
296 | BUG(); | ||
297 | } | ||
298 | if (a->buddy_list_alloced[i] != 0) { | ||
299 | pr_info("Excess alloced nodes!!! (%d: %llu)\n", | ||
300 | i, a->buddy_list_alloced[i]); | ||
301 | BUG(); | ||
302 | } | ||
303 | } | ||
304 | |||
305 | kfree(a); | ||
306 | |||
307 | alloc_unlock(__a); | ||
308 | } | ||
309 | |||
310 | /* | ||
311 | * Combine the passed buddy if possible. The pointer in @b may not be valid | ||
312 | * after this as the buddy may be freed. | ||
313 | * | ||
314 | * @a must be locked. | ||
315 | */ | ||
316 | static void balloc_coalesce(struct gk20a_buddy_allocator *a, | ||
317 | struct gk20a_buddy *b) | ||
318 | { | ||
319 | struct gk20a_buddy *parent; | ||
320 | |||
321 | if (buddy_is_alloced(b) || buddy_is_split(b)) | ||
322 | return; | ||
323 | |||
324 | /* | ||
325 | * If both our buddy and I are both not allocated and not split then | ||
326 | * we can coalesce ourselves. | ||
327 | */ | ||
328 | if (!b->buddy) | ||
329 | return; | ||
330 | if (buddy_is_alloced(b->buddy) || buddy_is_split(b->buddy)) | ||
331 | return; | ||
332 | |||
333 | parent = b->parent; | ||
334 | |||
335 | balloc_blist_rem(a, b); | ||
336 | balloc_blist_rem(a, b->buddy); | ||
337 | |||
338 | buddy_clr_split(parent); | ||
339 | a->buddy_list_split[parent->order]--; | ||
340 | balloc_blist_add(a, parent); | ||
341 | |||
342 | /* | ||
343 | * Recursively coalesce as far as we can go. | ||
344 | */ | ||
345 | balloc_coalesce(a, parent); | ||
346 | |||
347 | /* Clean up the remains. */ | ||
348 | kmem_cache_free(buddy_cache, b->buddy); | ||
349 | kmem_cache_free(buddy_cache, b); | ||
350 | } | ||
351 | |||
352 | /* | ||
353 | * Split a buddy into two new buddies who are 1/2 the size of the parent buddy. | ||
354 | * | ||
355 | * @a must be locked. | ||
356 | */ | ||
357 | static int balloc_split_buddy(struct gk20a_buddy_allocator *a, | ||
358 | struct gk20a_buddy *b, int pte_size) | ||
359 | { | ||
360 | struct gk20a_buddy *left, *right; | ||
361 | u64 half; | ||
362 | |||
363 | left = balloc_new_buddy(a, b, b->start, b->order - 1); | ||
364 | if (!left) | ||
365 | return -ENOMEM; | ||
366 | |||
367 | half = (b->end - b->start) / 2; | ||
368 | |||
369 | right = balloc_new_buddy(a, b, b->start + half, b->order - 1); | ||
370 | if (!right) { | ||
371 | kmem_cache_free(buddy_cache, left); | ||
372 | return -ENOMEM; | ||
373 | } | ||
374 | |||
375 | buddy_set_split(b); | ||
376 | a->buddy_list_split[b->order]++; | ||
377 | |||
378 | b->left = left; | ||
379 | b->right = right; | ||
380 | left->buddy = right; | ||
381 | right->buddy = left; | ||
382 | left->parent = b; | ||
383 | right->parent = b; | ||
384 | |||
385 | /* PTE considerations. */ | ||
386 | if (a->flags & GPU_BALLOC_GVA_SPACE && | ||
387 | left->order <= a->pte_blk_order) { | ||
388 | left->pte_size = pte_size; | ||
389 | right->pte_size = pte_size; | ||
390 | } | ||
391 | |||
392 | balloc_blist_rem(a, b); | ||
393 | balloc_blist_add(a, left); | ||
394 | balloc_blist_add(a, right); | ||
395 | |||
396 | return 0; | ||
397 | } | ||
398 | |||
399 | /* | ||
400 | * Place the passed buddy into the RB tree for allocated buddies. Never fails | ||
401 | * unless the passed entry is a duplicate which is a bug. | ||
402 | * | ||
403 | * @a must be locked. | ||
404 | */ | ||
405 | static void balloc_alloc_buddy(struct gk20a_buddy_allocator *a, | ||
406 | struct gk20a_buddy *b) | ||
407 | { | ||
408 | struct rb_node **new = &(a->alloced_buddies.rb_node); | ||
409 | struct rb_node *parent = NULL; | ||
410 | |||
411 | while (*new) { | ||
412 | struct gk20a_buddy *bud = container_of(*new, struct gk20a_buddy, | ||
413 | alloced_entry); | ||
414 | |||
415 | parent = *new; | ||
416 | if (b->start < bud->start) | ||
417 | new = &((*new)->rb_left); | ||
418 | else if (b->start > bud->start) | ||
419 | new = &((*new)->rb_right); | ||
420 | else | ||
421 | BUG_ON("Duplicate entries in allocated list!\n"); | ||
422 | } | ||
423 | |||
424 | rb_link_node(&b->alloced_entry, parent, new); | ||
425 | rb_insert_color(&b->alloced_entry, &a->alloced_buddies); | ||
426 | |||
427 | buddy_set_alloced(b); | ||
428 | a->buddy_list_alloced[b->order]++; | ||
429 | } | ||
430 | |||
431 | /* | ||
432 | * Remove the passed buddy from the allocated buddy RB tree. Returns the | ||
433 | * deallocated buddy for further processing. | ||
434 | * | ||
435 | * @a must be locked. | ||
436 | */ | ||
437 | static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a, | ||
438 | u64 addr) | ||
439 | { | ||
440 | struct rb_node *node = a->alloced_buddies.rb_node; | ||
441 | struct gk20a_buddy *bud; | ||
442 | |||
443 | while (node) { | ||
444 | bud = container_of(node, struct gk20a_buddy, alloced_entry); | ||
445 | |||
446 | if (addr < bud->start) | ||
447 | node = node->rb_left; | ||
448 | else if (addr > bud->start) | ||
449 | node = node->rb_right; | ||
450 | else | ||
451 | break; | ||
452 | } | ||
453 | |||
454 | if (!node) | ||
455 | return NULL; | ||
456 | |||
457 | rb_erase(node, &a->alloced_buddies); | ||
458 | buddy_clr_alloced(bud); | ||
459 | a->buddy_list_alloced[bud->order]--; | ||
460 | |||
461 | return bud; | ||
462 | } | ||
463 | |||
464 | /* | ||
465 | * Find a suitable buddy for the given order and PTE type (big or little). | ||
466 | */ | ||
467 | static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_buddy_allocator *a, | ||
468 | u64 order, int pte_size) | ||
469 | { | ||
470 | struct gk20a_buddy *bud; | ||
471 | |||
472 | if (order > a->max_order || | ||
473 | list_empty(balloc_get_order_list(a, order))) | ||
474 | return NULL; | ||
475 | |||
476 | if (a->flags & GPU_BALLOC_GVA_SPACE && | ||
477 | pte_size == BALLOC_PTE_SIZE_BIG) | ||
478 | bud = list_last_entry(balloc_get_order_list(a, order), | ||
479 | struct gk20a_buddy, buddy_entry); | ||
480 | else | ||
481 | bud = list_first_entry(balloc_get_order_list(a, order), | ||
482 | struct gk20a_buddy, buddy_entry); | ||
483 | |||
484 | if (bud->pte_size != BALLOC_PTE_SIZE_ANY && | ||
485 | bud->pte_size != pte_size) | ||
486 | return NULL; | ||
487 | |||
488 | return bud; | ||
489 | } | ||
490 | |||
491 | /* | ||
492 | * Allocate a suitably sized buddy. If no suitable buddy exists split higher | ||
493 | * order buddies until we have a suitable buddy to allocate. | ||
494 | * | ||
495 | * For PDE grouping add an extra check to see if a buddy is suitable: that the | ||
496 | * buddy exists in a PDE who's PTE size is reasonable | ||
497 | * | ||
498 | * @a must be locked. | ||
499 | */ | ||
500 | static u64 __balloc_do_alloc(struct gk20a_buddy_allocator *a, | ||
501 | u64 order, int pte_size) | ||
502 | { | ||
503 | u64 split_order; | ||
504 | struct gk20a_buddy *bud = NULL; | ||
505 | |||
506 | split_order = order; | ||
507 | while (split_order <= a->max_order && | ||
508 | !(bud = __balloc_find_buddy(a, split_order, pte_size))) | ||
509 | split_order++; | ||
510 | |||
511 | /* Out of memory! */ | ||
512 | if (!bud) | ||
513 | return 0; | ||
514 | |||
515 | while (bud->order != order) { | ||
516 | if (balloc_split_buddy(a, bud, pte_size)) | ||
517 | return 0; /* No mem... */ | ||
518 | bud = bud->left; | ||
519 | } | ||
520 | |||
521 | balloc_blist_rem(a, bud); | ||
522 | balloc_alloc_buddy(a, bud); | ||
523 | |||
524 | return bud->start; | ||
525 | } | ||
526 | |||
527 | /* | ||
528 | * See if the passed range is actually available for allocation. If so, then | ||
529 | * return 1, otherwise return 0. | ||
530 | * | ||
531 | * TODO: Right now this uses the unoptimal approach of going through all | ||
532 | * outstanding allocations and checking their base/ends. This could be better. | ||
533 | */ | ||
534 | static int balloc_is_range_free(struct gk20a_buddy_allocator *a, | ||
535 | u64 base, u64 end) | ||
536 | { | ||
537 | struct rb_node *node; | ||
538 | struct gk20a_buddy *bud; | ||
539 | |||
540 | node = rb_first(&a->alloced_buddies); | ||
541 | if (!node) | ||
542 | return 1; /* No allocs yet. */ | ||
543 | |||
544 | bud = container_of(node, struct gk20a_buddy, alloced_entry); | ||
545 | |||
546 | while (bud->start < end) { | ||
547 | if ((bud->start > base && bud->start < end) || | ||
548 | (bud->end > base && bud->end < end)) | ||
549 | return 0; | ||
550 | |||
551 | node = rb_next(node); | ||
552 | if (!node) | ||
553 | break; | ||
554 | bud = container_of(node, struct gk20a_buddy, alloced_entry); | ||
555 | } | ||
556 | |||
557 | return 1; | ||
558 | } | ||
559 | |||
560 | static void balloc_alloc_fixed(struct gk20a_buddy_allocator *a, | ||
561 | struct gk20a_fixed_alloc *f) | ||
562 | { | ||
563 | struct rb_node **new = &(a->fixed_allocs.rb_node); | ||
564 | struct rb_node *parent = NULL; | ||
565 | |||
566 | while (*new) { | ||
567 | struct gk20a_fixed_alloc *falloc = | ||
568 | container_of(*new, struct gk20a_fixed_alloc, | ||
569 | alloced_entry); | ||
570 | |||
571 | BUG_ON(!virt_addr_valid(falloc)); | ||
572 | |||
573 | parent = *new; | ||
574 | if (f->start < falloc->start) | ||
575 | new = &((*new)->rb_left); | ||
576 | else if (f->start > falloc->start) | ||
577 | new = &((*new)->rb_right); | ||
578 | else | ||
579 | BUG_ON("Duplicate entries in allocated list!\n"); | ||
580 | } | ||
581 | |||
582 | rb_link_node(&f->alloced_entry, parent, new); | ||
583 | rb_insert_color(&f->alloced_entry, &a->fixed_allocs); | ||
584 | } | ||
585 | |||
586 | /* | ||
587 | * Remove the passed buddy from the allocated buddy RB tree. Returns the | ||
588 | * deallocated buddy for further processing. | ||
589 | * | ||
590 | * @a must be locked. | ||
591 | */ | ||
592 | static struct gk20a_fixed_alloc *balloc_free_fixed( | ||
593 | struct gk20a_buddy_allocator *a, u64 addr) | ||
594 | { | ||
595 | struct rb_node *node = a->fixed_allocs.rb_node; | ||
596 | struct gk20a_fixed_alloc *falloc; | ||
597 | |||
598 | while (node) { | ||
599 | falloc = container_of(node, | ||
600 | struct gk20a_fixed_alloc, alloced_entry); | ||
601 | |||
602 | if (addr < falloc->start) | ||
603 | node = node->rb_left; | ||
604 | else if (addr > falloc->start) | ||
605 | node = node->rb_right; | ||
606 | else | ||
607 | break; | ||
608 | } | ||
609 | |||
610 | if (!node) | ||
611 | return NULL; | ||
612 | |||
613 | rb_erase(node, &a->fixed_allocs); | ||
614 | |||
615 | return falloc; | ||
616 | } | ||
617 | |||
618 | /* | ||
619 | * Find the parent range - doesn't necessarily need the parent to actually exist | ||
620 | * as a buddy. Finding an existing parent comes later... | ||
621 | */ | ||
622 | static void __balloc_get_parent_range(struct gk20a_buddy_allocator *a, | ||
623 | u64 base, u64 order, | ||
624 | u64 *pbase, u64 *porder) | ||
625 | { | ||
626 | u64 base_mask; | ||
627 | u64 shifted_base = balloc_base_shift(a, base); | ||
628 | |||
629 | order++; | ||
630 | base_mask = ~((a->blk_size << order) - 1); | ||
631 | |||
632 | shifted_base &= base_mask; | ||
633 | |||
634 | *pbase = balloc_base_unshift(a, shifted_base); | ||
635 | *porder = order; | ||
636 | } | ||
637 | |||
638 | /* | ||
639 | * Makes a buddy at the passed address. This will make all parent buddies | ||
640 | * necessary for this buddy to exist as well. | ||
641 | */ | ||
642 | static struct gk20a_buddy *__balloc_make_fixed_buddy( | ||
643 | struct gk20a_buddy_allocator *a, u64 base, u64 order) | ||
644 | { | ||
645 | struct gk20a_buddy *bud = NULL; | ||
646 | struct list_head *order_list; | ||
647 | u64 cur_order = order, cur_base = base; | ||
648 | |||
649 | /* | ||
650 | * Algo: | ||
651 | * 1. Keep jumping up a buddy order until we find the real buddy that | ||
652 | * this buddy exists in. | ||
653 | * 2. Then work our way down through the buddy tree until we hit a dead | ||
654 | * end. | ||
655 | * 3. Start splitting buddies until we split to the one we need to | ||
656 | * make. | ||
657 | */ | ||
658 | while (cur_order <= a->max_order) { | ||
659 | int found = 0; | ||
660 | |||
661 | order_list = balloc_get_order_list(a, cur_order); | ||
662 | list_for_each_entry(bud, order_list, buddy_entry) { | ||
663 | if (bud->start == cur_base) { | ||
664 | found = 1; | ||
665 | break; | ||
666 | } | ||
667 | } | ||
668 | |||
669 | if (found) | ||
670 | break; | ||
671 | |||
672 | __balloc_get_parent_range(a, cur_base, cur_order, | ||
673 | &cur_base, &cur_order); | ||
674 | } | ||
675 | |||
676 | if (cur_order > a->max_order) { | ||
677 | alloc_dbg(balloc_owner(a), "No buddy for range ???\n"); | ||
678 | return NULL; | ||
679 | } | ||
680 | |||
681 | /* Split this buddy as necessary until we get the target buddy. */ | ||
682 | while (bud->start != base || bud->order != order) { | ||
683 | if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) { | ||
684 | balloc_coalesce(a, bud); | ||
685 | return NULL; | ||
686 | } | ||
687 | |||
688 | if (base < bud->right->start) | ||
689 | bud = bud->left; | ||
690 | else | ||
691 | bud = bud->right; | ||
692 | |||
693 | } | ||
694 | |||
695 | return bud; | ||
696 | } | ||
697 | |||
698 | static u64 __balloc_do_alloc_fixed(struct gk20a_buddy_allocator *a, | ||
699 | struct gk20a_fixed_alloc *falloc, | ||
700 | u64 base, u64 len) | ||
701 | { | ||
702 | u64 shifted_base, inc_base; | ||
703 | u64 align_order; | ||
704 | |||
705 | shifted_base = balloc_base_shift(a, base); | ||
706 | if (shifted_base == 0) | ||
707 | align_order = __fls(len >> a->blk_shift); | ||
708 | else | ||
709 | align_order = min_t(u64, | ||
710 | __ffs(shifted_base >> a->blk_shift), | ||
711 | __fls(len >> a->blk_shift)); | ||
712 | |||
713 | if (align_order > a->max_order) { | ||
714 | alloc_dbg(balloc_owner(a), | ||
715 | "Align order too big: %llu > %llu\n", | ||
716 | align_order, a->max_order); | ||
717 | return 0; | ||
718 | } | ||
719 | |||
720 | /* | ||
721 | * Generate a list of buddies that satisfy this allocation. | ||
722 | */ | ||
723 | inc_base = shifted_base; | ||
724 | while (inc_base < (shifted_base + len)) { | ||
725 | u64 order_len = balloc_order_to_len(a, align_order); | ||
726 | u64 remaining; | ||
727 | struct gk20a_buddy *bud; | ||
728 | |||
729 | bud = __balloc_make_fixed_buddy(a, | ||
730 | balloc_base_unshift(a, inc_base), | ||
731 | align_order); | ||
732 | if (!bud) { | ||
733 | alloc_dbg(balloc_owner(a), | ||
734 | "Fixed buddy failed: {0x%llx, %llu}!\n", | ||
735 | balloc_base_unshift(a, inc_base), | ||
736 | align_order); | ||
737 | goto err_and_cleanup; | ||
738 | } | ||
739 | |||
740 | balloc_blist_rem(a, bud); | ||
741 | balloc_alloc_buddy(a, bud); | ||
742 | __balloc_buddy_list_add(a, bud, &falloc->buddies); | ||
743 | |||
744 | /* Book keeping. */ | ||
745 | inc_base += order_len; | ||
746 | remaining = (shifted_base + len) - inc_base; | ||
747 | align_order = __ffs(inc_base >> a->blk_shift); | ||
748 | |||
749 | /* If we don't have much left - trim down align_order. */ | ||
750 | if (balloc_order_to_len(a, align_order) > remaining) | ||
751 | align_order = __balloc_max_order_in(a, inc_base, | ||
752 | inc_base + remaining); | ||
753 | } | ||
754 | |||
755 | return base; | ||
756 | |||
757 | err_and_cleanup: | ||
758 | while (!list_empty(&falloc->buddies)) { | ||
759 | struct gk20a_buddy *bud = list_first_entry(&falloc->buddies, | ||
760 | struct gk20a_buddy, | ||
761 | buddy_entry); | ||
762 | |||
763 | __balloc_buddy_list_rem(a, bud); | ||
764 | balloc_free_buddy(a, bud->start); | ||
765 | kmem_cache_free(buddy_cache, bud); | ||
766 | } | ||
767 | |||
768 | return 0; | ||
769 | } | ||
770 | |||
771 | static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a, | ||
772 | struct gk20a_fixed_alloc *falloc) | ||
773 | { | ||
774 | struct gk20a_buddy *bud; | ||
775 | |||
776 | while (!list_empty(&falloc->buddies)) { | ||
777 | bud = list_first_entry(&falloc->buddies, | ||
778 | struct gk20a_buddy, | ||
779 | buddy_entry); | ||
780 | __balloc_buddy_list_rem(a, bud); | ||
781 | |||
782 | balloc_free_buddy(a, bud->start); | ||
783 | balloc_blist_add(a, bud); | ||
784 | a->bytes_freed += balloc_order_to_len(a, bud->order); | ||
785 | |||
786 | /* | ||
787 | * Attemp to defrag the allocation. | ||
788 | */ | ||
789 | balloc_coalesce(a, bud); | ||
790 | } | ||
791 | |||
792 | kfree(falloc); | ||
793 | } | ||
794 | |||
795 | /* | ||
796 | * Allocate memory from the passed allocator. | ||
797 | */ | ||
798 | static u64 gk20a_buddy_balloc(struct gk20a_allocator *__a, u64 len) | ||
799 | { | ||
800 | u64 order, addr; | ||
801 | int pte_size; | ||
802 | struct gk20a_buddy_allocator *a = __a->priv; | ||
803 | |||
804 | gk20a_alloc_trace_func(); | ||
805 | |||
806 | alloc_lock(__a); | ||
807 | |||
808 | order = balloc_get_order(a, len); | ||
809 | |||
810 | if (order > a->max_order) { | ||
811 | alloc_unlock(__a); | ||
812 | alloc_dbg(balloc_owner(a), "Alloc fail\n"); | ||
813 | gk20a_alloc_trace_func_done(); | ||
814 | return 0; | ||
815 | } | ||
816 | |||
817 | /* | ||
818 | * For now pass the base address of the allocator's region to | ||
819 | * __get_pte_size(). This ensures we get the right page size for | ||
820 | * the alloc but we don't have to know what the real address is | ||
821 | * going to be quite yet. | ||
822 | * | ||
823 | * TODO: once userspace supports a unified address space pass 0 for | ||
824 | * the base. This will make only 'len' affect the PTE size. | ||
825 | */ | ||
826 | if (a->flags & GPU_BALLOC_GVA_SPACE) | ||
827 | pte_size = __get_pte_size(a->vm, a->base, len); | ||
828 | else | ||
829 | pte_size = BALLOC_PTE_SIZE_ANY; | ||
830 | |||
831 | addr = __balloc_do_alloc(a, order, pte_size); | ||
832 | |||
833 | if (addr) { | ||
834 | a->bytes_alloced += len; | ||
835 | a->bytes_alloced_real += balloc_order_to_len(a, order); | ||
836 | alloc_dbg(balloc_owner(a), | ||
837 | "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n", | ||
838 | addr, order, len, | ||
839 | pte_size == gmmu_page_size_big ? "big" : | ||
840 | pte_size == gmmu_page_size_small ? "small" : | ||
841 | "NA/any"); | ||
842 | } else { | ||
843 | alloc_dbg(balloc_owner(a), "Alloc failed: no mem!\n"); | ||
844 | } | ||
845 | |||
846 | alloc_unlock(__a); | ||
847 | |||
848 | gk20a_alloc_trace_func_done(); | ||
849 | return addr; | ||
850 | } | ||
851 | |||
852 | /* | ||
853 | * Allocate a fixed address allocation. The address of the allocation is @base | ||
854 | * and the length is @len. This is not a typical buddy allocator operation and | ||
855 | * as such has a high posibility of failure if the address space is heavily in | ||
856 | * use. | ||
857 | * | ||
858 | * Please do not use this function unless _absolutely_ necessary. | ||
859 | */ | ||
860 | static u64 gk20a_balloc_fixed_buddy(struct gk20a_allocator *__a, | ||
861 | u64 base, u64 len) | ||
862 | { | ||
863 | u64 ret, real_bytes = 0; | ||
864 | struct gk20a_buddy *bud; | ||
865 | struct gk20a_fixed_alloc *falloc = NULL; | ||
866 | struct gk20a_buddy_allocator *a = __a->priv; | ||
867 | |||
868 | gk20a_alloc_trace_func(); | ||
869 | |||
870 | /* If base isn't aligned to an order 0 block, fail. */ | ||
871 | if (base & (a->blk_size - 1)) | ||
872 | goto fail; | ||
873 | |||
874 | if (len == 0) | ||
875 | goto fail; | ||
876 | |||
877 | falloc = kmalloc(sizeof(*falloc), GFP_KERNEL); | ||
878 | if (!falloc) | ||
879 | goto fail; | ||
880 | |||
881 | INIT_LIST_HEAD(&falloc->buddies); | ||
882 | falloc->start = base; | ||
883 | falloc->end = base + len; | ||
884 | |||
885 | alloc_lock(__a); | ||
886 | if (!balloc_is_range_free(a, base, base + len)) { | ||
887 | alloc_dbg(balloc_owner(a), | ||
888 | "Range not free: 0x%llx -> 0x%llx\n", | ||
889 | base, base + len); | ||
890 | goto fail_unlock; | ||
891 | } | ||
892 | |||
893 | ret = __balloc_do_alloc_fixed(a, falloc, base, len); | ||
894 | if (!ret) { | ||
895 | alloc_dbg(balloc_owner(a), | ||
896 | "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n", | ||
897 | base, base + len); | ||
898 | goto fail_unlock; | ||
899 | } | ||
900 | |||
901 | balloc_alloc_fixed(a, falloc); | ||
902 | |||
903 | list_for_each_entry(bud, &falloc->buddies, buddy_entry) | ||
904 | real_bytes += (bud->end - bud->start); | ||
905 | |||
906 | a->bytes_alloced += len; | ||
907 | a->bytes_alloced_real += real_bytes; | ||
908 | |||
909 | alloc_unlock(__a); | ||
910 | alloc_dbg(balloc_owner(a), "Alloc (fixed) 0x%llx\n", base); | ||
911 | |||
912 | gk20a_alloc_trace_func_done(); | ||
913 | return base; | ||
914 | |||
915 | fail_unlock: | ||
916 | alloc_unlock(__a); | ||
917 | fail: | ||
918 | kfree(falloc); | ||
919 | gk20a_alloc_trace_func_done(); | ||
920 | return 0; | ||
921 | } | ||
922 | |||
923 | /* | ||
924 | * Free the passed allocation. | ||
925 | */ | ||
926 | static void gk20a_buddy_bfree(struct gk20a_allocator *__a, u64 addr) | ||
927 | { | ||
928 | struct gk20a_buddy *bud; | ||
929 | struct gk20a_fixed_alloc *falloc; | ||
930 | struct gk20a_buddy_allocator *a = __a->priv; | ||
931 | |||
932 | gk20a_alloc_trace_func(); | ||
933 | |||
934 | if (!addr) { | ||
935 | gk20a_alloc_trace_func_done(); | ||
936 | return; | ||
937 | } | ||
938 | |||
939 | alloc_lock(__a); | ||
940 | |||
941 | /* | ||
942 | * First see if this is a fixed alloc. If not fall back to a regular | ||
943 | * buddy. | ||
944 | */ | ||
945 | falloc = balloc_free_fixed(a, addr); | ||
946 | if (falloc) { | ||
947 | __balloc_do_free_fixed(a, falloc); | ||
948 | goto done; | ||
949 | } | ||
950 | |||
951 | bud = balloc_free_buddy(a, addr); | ||
952 | if (!bud) | ||
953 | goto done; | ||
954 | |||
955 | balloc_blist_add(a, bud); | ||
956 | a->bytes_freed += balloc_order_to_len(a, bud->order); | ||
957 | |||
958 | /* | ||
959 | * Attemp to defrag the allocation. | ||
960 | */ | ||
961 | balloc_coalesce(a, bud); | ||
962 | |||
963 | done: | ||
964 | alloc_unlock(__a); | ||
965 | alloc_dbg(balloc_owner(a), "Free 0x%llx\n", addr); | ||
966 | gk20a_alloc_trace_func_done(); | ||
967 | return; | ||
968 | } | ||
969 | |||
970 | static u64 gk20a_buddy_alloc_length(struct gk20a_allocator *a) | ||
971 | { | ||
972 | struct gk20a_buddy_allocator *ba = a->priv; | ||
973 | |||
974 | return ba->length; | ||
975 | } | ||
976 | |||
977 | static u64 gk20a_buddy_alloc_base(struct gk20a_allocator *a) | ||
978 | { | ||
979 | struct gk20a_buddy_allocator *ba = a->priv; | ||
980 | |||
981 | return ba->start; | ||
982 | } | ||
983 | |||
984 | static int gk20a_buddy_alloc_inited(struct gk20a_allocator *a) | ||
985 | { | ||
986 | struct gk20a_buddy_allocator *ba = a->priv; | ||
987 | |||
988 | return ba->initialized; | ||
989 | } | ||
990 | |||
991 | static u64 gk20a_buddy_alloc_end(struct gk20a_allocator *a) | ||
992 | { | ||
993 | struct gk20a_buddy_allocator *ba = a->priv; | ||
994 | |||
995 | return ba->end; | ||
996 | } | ||
997 | |||
998 | /* | ||
999 | * Print the buddy allocator top level stats. If you pass @s as NULL then the | ||
1000 | * stats are printed to the kernel log. This lets this code be used for | ||
1001 | * debugging purposes internal to the allocator. | ||
1002 | */ | ||
1003 | static void gk20a_buddy_print_stats(struct gk20a_allocator *__a, | ||
1004 | struct seq_file *s, int lock) | ||
1005 | { | ||
1006 | int i; | ||
1007 | struct rb_node *node; | ||
1008 | struct gk20a_fixed_alloc *falloc; | ||
1009 | struct gk20a_buddy_allocator *a = __a->priv; | ||
1010 | |||
1011 | __alloc_pstat(s, __a, "base = %llu, limit = %llu, blk_size = %llu\n", | ||
1012 | a->base, a->length, a->blk_size); | ||
1013 | __alloc_pstat(s, __a, "Internal params:\n"); | ||
1014 | __alloc_pstat(s, __a, " start = 0x%llx\n", a->start); | ||
1015 | __alloc_pstat(s, __a, " end = 0x%llx\n", a->end); | ||
1016 | __alloc_pstat(s, __a, " count = 0x%llx\n", a->count); | ||
1017 | __alloc_pstat(s, __a, " blks = 0x%llx\n", a->blks); | ||
1018 | __alloc_pstat(s, __a, " max_order = %llu\n", a->max_order); | ||
1019 | |||
1020 | __alloc_pstat(s, __a, "Buddy blocks:\n"); | ||
1021 | __alloc_pstat(s, __a, " Order Free Alloced Split\n"); | ||
1022 | __alloc_pstat(s, __a, " ----- ---- ------- -----\n"); | ||
1023 | |||
1024 | if (lock) | ||
1025 | alloc_lock(__a); | ||
1026 | for (i = a->max_order; i >= 0; i--) { | ||
1027 | if (a->buddy_list_len[i] == 0 && | ||
1028 | a->buddy_list_alloced[i] == 0 && | ||
1029 | a->buddy_list_split[i] == 0) | ||
1030 | continue; | ||
1031 | |||
1032 | __alloc_pstat(s, __a, " %3d %-7llu %-9llu %llu\n", i, | ||
1033 | a->buddy_list_len[i], | ||
1034 | a->buddy_list_alloced[i], | ||
1035 | a->buddy_list_split[i]); | ||
1036 | } | ||
1037 | |||
1038 | __alloc_pstat(s, __a, "\n"); | ||
1039 | |||
1040 | for (node = rb_first(&a->fixed_allocs), i = 1; | ||
1041 | node != NULL; | ||
1042 | node = rb_next(node)) { | ||
1043 | falloc = container_of(node, | ||
1044 | struct gk20a_fixed_alloc, alloced_entry); | ||
1045 | |||
1046 | __alloc_pstat(s, __a, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n", | ||
1047 | i, falloc->start, falloc->end); | ||
1048 | } | ||
1049 | |||
1050 | __alloc_pstat(s, __a, "\n"); | ||
1051 | __alloc_pstat(s, __a, "Bytes allocated: %llu\n", | ||
1052 | a->bytes_alloced); | ||
1053 | __alloc_pstat(s, __a, "Bytes allocated (real): %llu\n", | ||
1054 | a->bytes_alloced_real); | ||
1055 | __alloc_pstat(s, __a, "Bytes freed: %llu\n", | ||
1056 | a->bytes_freed); | ||
1057 | |||
1058 | if (lock) | ||
1059 | alloc_unlock(__a); | ||
1060 | } | ||
1061 | |||
1062 | static const struct gk20a_allocator_ops buddy_ops = { | ||
1063 | .alloc = gk20a_buddy_balloc, | ||
1064 | .free = gk20a_buddy_bfree, | ||
1065 | |||
1066 | .alloc_fixed = gk20a_balloc_fixed_buddy, | ||
1067 | /* .free_fixed not needed. */ | ||
1068 | |||
1069 | .base = gk20a_buddy_alloc_base, | ||
1070 | .length = gk20a_buddy_alloc_length, | ||
1071 | .end = gk20a_buddy_alloc_end, | ||
1072 | .inited = gk20a_buddy_alloc_inited, | ||
1073 | |||
1074 | .fini = gk20a_buddy_allocator_destroy, | ||
1075 | |||
1076 | .print_stats = gk20a_buddy_print_stats, | ||
1077 | }; | ||
1078 | |||
1079 | /* | ||
1080 | * Initialize a buddy allocator. Returns 0 on success. This allocator does | ||
1081 | * not necessarily manage bytes. It manages distinct ranges of resources. This | ||
1082 | * allows the allocator to work for things like comp_tags, semaphores, etc. | ||
1083 | * | ||
1084 | * @allocator: Ptr to an allocator struct to init. | ||
1085 | * @vm: GPU VM to associate this allocator with. Can be NULL. Will be used to | ||
1086 | * get PTE size for GVA spaces. | ||
1087 | * @name: Name of the allocator. Doesn't have to be static storage. | ||
1088 | * @base: The base address of the resource pool being managed. | ||
1089 | * @size: Number of resources in the pool. | ||
1090 | * @blk_size: Minimum number of resources to allocate at once. For things like | ||
1091 | * semaphores this is 1. For GVA this might be as much as 64k. This | ||
1092 | * corresponds to order 0. Must be power of 2. | ||
1093 | * @max_order: Pick a maximum order. If you leave this as 0, the buddy allocator | ||
1094 | * will try and pick a reasonable max order. | ||
1095 | * @flags: Extra flags necessary. See GPU_BALLOC_*. | ||
1096 | */ | ||
1097 | int __gk20a_buddy_allocator_init(struct gk20a_allocator *__a, | ||
1098 | struct vm_gk20a *vm, const char *name, | ||
1099 | u64 base, u64 size, u64 blk_size, | ||
1100 | u64 max_order, u64 flags) | ||
1101 | { | ||
1102 | int err; | ||
1103 | struct gk20a_buddy_allocator *a; | ||
1104 | |||
1105 | /* blk_size must be greater than 0 and a power of 2. */ | ||
1106 | if (blk_size == 0) | ||
1107 | return -EINVAL; | ||
1108 | if (blk_size & (blk_size - 1)) | ||
1109 | return -EINVAL; | ||
1110 | |||
1111 | if (max_order > GPU_BALLOC_MAX_ORDER) | ||
1112 | return -EINVAL; | ||
1113 | |||
1114 | /* If this is to manage a GVA space we need a VM. */ | ||
1115 | if (flags & GPU_BALLOC_GVA_SPACE && !vm) | ||
1116 | return -EINVAL; | ||
1117 | |||
1118 | a = kzalloc(sizeof(struct gk20a_buddy_allocator), GFP_KERNEL); | ||
1119 | if (!a) | ||
1120 | return -ENOMEM; | ||
1121 | |||
1122 | err = __gk20a_alloc_common_init(__a, name, a, &buddy_ops); | ||
1123 | if (err) | ||
1124 | goto fail; | ||
1125 | |||
1126 | a->base = base; | ||
1127 | a->length = size; | ||
1128 | a->blk_size = blk_size; | ||
1129 | a->blk_shift = __ffs(blk_size); | ||
1130 | a->owner = __a; | ||
1131 | |||
1132 | /* | ||
1133 | * If base is 0 then modfy base to be the size of one block so that we | ||
1134 | * can return errors by returning addr == 0. | ||
1135 | */ | ||
1136 | if (a->base == 0) { | ||
1137 | a->base = a->blk_size; | ||
1138 | a->length -= a->blk_size; | ||
1139 | } | ||
1140 | |||
1141 | a->vm = vm; | ||
1142 | if (flags & GPU_BALLOC_GVA_SPACE) | ||
1143 | a->pte_blk_order = balloc_get_order(a, vm->big_page_size << 10); | ||
1144 | |||
1145 | a->flags = flags; | ||
1146 | a->max_order = max_order; | ||
1147 | |||
1148 | balloc_allocator_align(a); | ||
1149 | balloc_compute_max_order(a); | ||
1150 | |||
1151 | /* Shared buddy kmem_cache for all allocators. */ | ||
1152 | if (!buddy_cache) | ||
1153 | buddy_cache = KMEM_CACHE(gk20a_buddy, 0); | ||
1154 | if (!buddy_cache) { | ||
1155 | err = -ENOMEM; | ||
1156 | goto fail; | ||
1157 | } | ||
1158 | |||
1159 | a->alloced_buddies = RB_ROOT; | ||
1160 | a->fixed_allocs = RB_ROOT; | ||
1161 | err = balloc_init_lists(a); | ||
1162 | if (err) | ||
1163 | goto fail; | ||
1164 | |||
1165 | a->initialized = 1; | ||
1166 | |||
1167 | gk20a_init_alloc_debug(__a); | ||
1168 | alloc_dbg(__a, "New allocator: type buddy\n"); | ||
1169 | alloc_dbg(__a, " base 0x%llx\n", a->base); | ||
1170 | alloc_dbg(__a, " size 0x%llx\n", a->length); | ||
1171 | alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); | ||
1172 | alloc_dbg(__a, " max_order %llu\n", a->max_order); | ||
1173 | alloc_dbg(__a, " flags 0x%llx\n", a->flags); | ||
1174 | |||
1175 | return 0; | ||
1176 | |||
1177 | fail: | ||
1178 | kfree(a); | ||
1179 | return err; | ||
1180 | } | ||
1181 | |||
1182 | int gk20a_buddy_allocator_init(struct gk20a_allocator *a, const char *name, | ||
1183 | u64 base, u64 size, u64 blk_size, u64 flags) | ||
1184 | { | ||
1185 | return __gk20a_buddy_allocator_init(a, NULL, name, | ||
1186 | base, size, blk_size, 0, 0); | ||
1187 | } | ||