diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common/mm')
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/bitmap_allocator.c | 443 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/bitmap_allocator_priv.h | 70 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/buddy_allocator.c | 1329 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/buddy_allocator_priv.h | 192 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/lockless_allocator.c | 207 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/lockless_allocator_priv.h | 121 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c | 212 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/page_allocator.c | 937 |
8 files changed, 3511 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c new file mode 100644 index 00000000..6f267c85 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c | |||
@@ -0,0 +1,443 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/slab.h> | ||
19 | #include <linux/bitops.h> | ||
20 | |||
21 | #include <nvgpu/allocator.h> | ||
22 | |||
23 | #include "bitmap_allocator_priv.h" | ||
24 | |||
25 | static struct kmem_cache *meta_data_cache; /* slab cache for meta data. */ | ||
26 | static DEFINE_MUTEX(meta_data_cache_lock); | ||
27 | |||
28 | static u64 nvgpu_bitmap_alloc_length(struct nvgpu_allocator *a) | ||
29 | { | ||
30 | struct nvgpu_bitmap_allocator *ba = a->priv; | ||
31 | |||
32 | return ba->length; | ||
33 | } | ||
34 | |||
35 | static u64 nvgpu_bitmap_alloc_base(struct nvgpu_allocator *a) | ||
36 | { | ||
37 | struct nvgpu_bitmap_allocator *ba = a->priv; | ||
38 | |||
39 | return ba->base; | ||
40 | } | ||
41 | |||
42 | static int nvgpu_bitmap_alloc_inited(struct nvgpu_allocator *a) | ||
43 | { | ||
44 | struct nvgpu_bitmap_allocator *ba = a->priv; | ||
45 | int inited = ba->inited; | ||
46 | |||
47 | rmb(); | ||
48 | return inited; | ||
49 | } | ||
50 | |||
51 | static u64 nvgpu_bitmap_alloc_end(struct nvgpu_allocator *a) | ||
52 | { | ||
53 | struct nvgpu_bitmap_allocator *ba = a->priv; | ||
54 | |||
55 | return ba->base + ba->length; | ||
56 | } | ||
57 | |||
58 | static u64 nvgpu_bitmap_alloc_fixed(struct nvgpu_allocator *__a, | ||
59 | u64 base, u64 len) | ||
60 | { | ||
61 | struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a); | ||
62 | u64 blks, offs, ret; | ||
63 | |||
64 | /* Compute the bit offset and make sure it's aligned to a block. */ | ||
65 | offs = base >> a->blk_shift; | ||
66 | if (offs * a->blk_size != base) | ||
67 | return 0; | ||
68 | |||
69 | offs -= a->bit_offs; | ||
70 | |||
71 | blks = len >> a->blk_shift; | ||
72 | if (blks * a->blk_size != len) | ||
73 | blks++; | ||
74 | |||
75 | alloc_lock(__a); | ||
76 | |||
77 | /* Check if the space requested is already occupied. */ | ||
78 | ret = bitmap_find_next_zero_area(a->bitmap, a->num_bits, offs, blks, 0); | ||
79 | if (ret != offs) | ||
80 | goto fail; | ||
81 | |||
82 | bitmap_set(a->bitmap, offs, blks); | ||
83 | |||
84 | a->bytes_alloced += blks * a->blk_size; | ||
85 | a->nr_fixed_allocs++; | ||
86 | alloc_unlock(__a); | ||
87 | |||
88 | alloc_dbg(__a, "Alloc-fixed 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]\n", | ||
89 | base, len, blks, blks); | ||
90 | return base; | ||
91 | |||
92 | fail: | ||
93 | alloc_unlock(__a); | ||
94 | alloc_dbg(__a, "Alloc-fixed failed! (0x%llx)\n", base); | ||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | /* | ||
99 | * Two possibilities for this function: either we are freeing a fixed allocation | ||
100 | * or we are freeing a regular alloc but with GPU_ALLOC_NO_ALLOC_PAGE defined. | ||
101 | * | ||
102 | * Note: this function won't do much error checking. Thus you could really | ||
103 | * confuse the allocator if you misuse this function. | ||
104 | */ | ||
105 | static void nvgpu_bitmap_free_fixed(struct nvgpu_allocator *__a, | ||
106 | u64 base, u64 len) | ||
107 | { | ||
108 | struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a); | ||
109 | u64 blks, offs; | ||
110 | |||
111 | offs = base >> a->blk_shift; | ||
112 | if (WARN_ON(offs * a->blk_size != base)) | ||
113 | return; | ||
114 | |||
115 | offs -= a->bit_offs; | ||
116 | |||
117 | blks = len >> a->blk_shift; | ||
118 | if (blks * a->blk_size != len) | ||
119 | blks++; | ||
120 | |||
121 | alloc_lock(__a); | ||
122 | bitmap_clear(a->bitmap, offs, blks); | ||
123 | a->bytes_freed += blks * a->blk_size; | ||
124 | alloc_unlock(__a); | ||
125 | |||
126 | alloc_dbg(__a, "Free-fixed 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]\n", | ||
127 | base, len, blks, blks); | ||
128 | } | ||
129 | |||
130 | /* | ||
131 | * Add the passed alloc to the tree of stored allocations. | ||
132 | */ | ||
133 | static void insert_alloc_metadata(struct nvgpu_bitmap_allocator *a, | ||
134 | struct nvgpu_bitmap_alloc *alloc) | ||
135 | { | ||
136 | struct rb_node **new = &a->allocs.rb_node; | ||
137 | struct rb_node *parent = NULL; | ||
138 | struct nvgpu_bitmap_alloc *tmp; | ||
139 | |||
140 | while (*new) { | ||
141 | tmp = container_of(*new, struct nvgpu_bitmap_alloc, | ||
142 | alloc_entry); | ||
143 | |||
144 | parent = *new; | ||
145 | if (alloc->base < tmp->base) | ||
146 | new = &((*new)->rb_left); | ||
147 | else if (alloc->base > tmp->base) | ||
148 | new = &((*new)->rb_right); | ||
149 | else { | ||
150 | WARN_ON("Duplicate entries in RB alloc tree!\n"); | ||
151 | return; | ||
152 | } | ||
153 | } | ||
154 | |||
155 | rb_link_node(&alloc->alloc_entry, parent, new); | ||
156 | rb_insert_color(&alloc->alloc_entry, &a->allocs); | ||
157 | } | ||
158 | |||
159 | /* | ||
160 | * Find and remove meta-data from the outstanding allocations. | ||
161 | */ | ||
162 | static struct nvgpu_bitmap_alloc *find_alloc_metadata( | ||
163 | struct nvgpu_bitmap_allocator *a, u64 addr) | ||
164 | { | ||
165 | struct rb_node *node = a->allocs.rb_node; | ||
166 | struct nvgpu_bitmap_alloc *alloc; | ||
167 | |||
168 | while (node) { | ||
169 | alloc = container_of(node, struct nvgpu_bitmap_alloc, | ||
170 | alloc_entry); | ||
171 | |||
172 | if (addr < alloc->base) | ||
173 | node = node->rb_left; | ||
174 | else if (addr > alloc->base) | ||
175 | node = node->rb_right; | ||
176 | else | ||
177 | break; | ||
178 | } | ||
179 | |||
180 | if (!node) | ||
181 | return NULL; | ||
182 | |||
183 | rb_erase(node, &a->allocs); | ||
184 | |||
185 | return alloc; | ||
186 | } | ||
187 | |||
188 | /* | ||
189 | * Tree of alloc meta data stores the address of the alloc not the bit offset. | ||
190 | */ | ||
191 | static int __nvgpu_bitmap_store_alloc(struct nvgpu_bitmap_allocator *a, | ||
192 | u64 addr, u64 len) | ||
193 | { | ||
194 | struct nvgpu_bitmap_alloc *alloc = | ||
195 | kmem_cache_alloc(meta_data_cache, GFP_KERNEL); | ||
196 | |||
197 | if (!alloc) | ||
198 | return -ENOMEM; | ||
199 | |||
200 | alloc->base = addr; | ||
201 | alloc->length = len; | ||
202 | |||
203 | insert_alloc_metadata(a, alloc); | ||
204 | |||
205 | return 0; | ||
206 | } | ||
207 | |||
208 | /* | ||
209 | * @len is in bytes. This routine will figure out the right number of bits to | ||
210 | * actually allocate. The return is the address in bytes as well. | ||
211 | */ | ||
212 | static u64 nvgpu_bitmap_alloc(struct nvgpu_allocator *__a, u64 len) | ||
213 | { | ||
214 | u64 blks, addr; | ||
215 | unsigned long offs, adjusted_offs, limit; | ||
216 | struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a); | ||
217 | |||
218 | blks = len >> a->blk_shift; | ||
219 | |||
220 | if (blks * a->blk_size != len) | ||
221 | blks++; | ||
222 | |||
223 | alloc_lock(__a); | ||
224 | |||
225 | /* | ||
226 | * First look from next_blk and onwards... | ||
227 | */ | ||
228 | offs = bitmap_find_next_zero_area(a->bitmap, a->num_bits, | ||
229 | a->next_blk, blks, 0); | ||
230 | if (offs >= a->num_bits) { | ||
231 | /* | ||
232 | * If that didn't work try the remaining area. Since there can | ||
233 | * be available space that spans across a->next_blk we need to | ||
234 | * search up to the first set bit after that. | ||
235 | */ | ||
236 | limit = find_next_bit(a->bitmap, a->num_bits, a->next_blk); | ||
237 | offs = bitmap_find_next_zero_area(a->bitmap, limit, | ||
238 | 0, blks, 0); | ||
239 | if (offs >= a->next_blk) | ||
240 | goto fail; | ||
241 | } | ||
242 | |||
243 | bitmap_set(a->bitmap, offs, blks); | ||
244 | a->next_blk = offs + blks; | ||
245 | |||
246 | adjusted_offs = offs + a->bit_offs; | ||
247 | addr = ((u64)adjusted_offs) * a->blk_size; | ||
248 | |||
249 | /* | ||
250 | * Only do meta-data storage if we are allowed to allocate storage for | ||
251 | * that meta-data. The issue with using kmalloc() and friends is that | ||
252 | * in latency and success critical paths an alloc_page() call can either | ||
253 | * sleep for potentially a long time or, assuming GFP_ATOMIC, fail. | ||
254 | * Since we might not want either of these possibilities assume that the | ||
255 | * caller will keep what data it needs around to successfully free this | ||
256 | * allocation. | ||
257 | */ | ||
258 | if (!(a->flags & GPU_ALLOC_NO_ALLOC_PAGE) && | ||
259 | __nvgpu_bitmap_store_alloc(a, addr, blks * a->blk_size)) | ||
260 | goto fail_reset_bitmap; | ||
261 | |||
262 | alloc_dbg(__a, "Alloc 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]\n", | ||
263 | addr, len, blks, blks); | ||
264 | |||
265 | a->nr_allocs++; | ||
266 | a->bytes_alloced += (blks * a->blk_size); | ||
267 | alloc_unlock(__a); | ||
268 | |||
269 | return addr; | ||
270 | |||
271 | fail_reset_bitmap: | ||
272 | bitmap_clear(a->bitmap, offs, blks); | ||
273 | fail: | ||
274 | a->next_blk = 0; | ||
275 | alloc_unlock(__a); | ||
276 | alloc_dbg(__a, "Alloc failed!\n"); | ||
277 | return 0; | ||
278 | } | ||
279 | |||
280 | static void nvgpu_bitmap_free(struct nvgpu_allocator *__a, u64 addr) | ||
281 | { | ||
282 | struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a); | ||
283 | struct nvgpu_bitmap_alloc *alloc = NULL; | ||
284 | u64 offs, adjusted_offs, blks; | ||
285 | |||
286 | alloc_lock(__a); | ||
287 | |||
288 | if (a->flags & GPU_ALLOC_NO_ALLOC_PAGE) { | ||
289 | WARN(1, "Using wrong free for NO_ALLOC_PAGE bitmap allocator"); | ||
290 | goto done; | ||
291 | } | ||
292 | |||
293 | alloc = find_alloc_metadata(a, addr); | ||
294 | if (!alloc) | ||
295 | goto done; | ||
296 | |||
297 | /* | ||
298 | * Address comes from adjusted offset (i.e the bit offset with | ||
299 | * a->bit_offs added. So start with that and then work out the real | ||
300 | * offs into the bitmap. | ||
301 | */ | ||
302 | adjusted_offs = addr >> a->blk_shift; | ||
303 | offs = adjusted_offs - a->bit_offs; | ||
304 | blks = alloc->length >> a->blk_shift; | ||
305 | |||
306 | bitmap_clear(a->bitmap, offs, blks); | ||
307 | alloc_dbg(__a, "Free 0x%-10llx\n", addr); | ||
308 | |||
309 | a->bytes_freed += alloc->length; | ||
310 | |||
311 | done: | ||
312 | kfree(alloc); | ||
313 | alloc_unlock(__a); | ||
314 | } | ||
315 | |||
316 | static void nvgpu_bitmap_alloc_destroy(struct nvgpu_allocator *__a) | ||
317 | { | ||
318 | struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a); | ||
319 | struct nvgpu_bitmap_alloc *alloc; | ||
320 | struct rb_node *node; | ||
321 | |||
322 | /* | ||
323 | * Kill any outstanding allocations. | ||
324 | */ | ||
325 | while ((node = rb_first(&a->allocs)) != NULL) { | ||
326 | alloc = container_of(node, struct nvgpu_bitmap_alloc, | ||
327 | alloc_entry); | ||
328 | |||
329 | rb_erase(node, &a->allocs); | ||
330 | kfree(alloc); | ||
331 | } | ||
332 | |||
333 | kfree(a->bitmap); | ||
334 | kfree(a); | ||
335 | } | ||
336 | |||
337 | static void nvgpu_bitmap_print_stats(struct nvgpu_allocator *__a, | ||
338 | struct seq_file *s, int lock) | ||
339 | { | ||
340 | struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a); | ||
341 | |||
342 | __alloc_pstat(s, __a, "Bitmap allocator params:\n"); | ||
343 | __alloc_pstat(s, __a, " start = 0x%llx\n", a->base); | ||
344 | __alloc_pstat(s, __a, " end = 0x%llx\n", a->base + a->length); | ||
345 | __alloc_pstat(s, __a, " blks = 0x%llx\n", a->num_bits); | ||
346 | |||
347 | /* Actual stats. */ | ||
348 | __alloc_pstat(s, __a, "Stats:\n"); | ||
349 | __alloc_pstat(s, __a, " Number allocs = 0x%llx\n", a->nr_allocs); | ||
350 | __alloc_pstat(s, __a, " Number fixed = 0x%llx\n", a->nr_fixed_allocs); | ||
351 | __alloc_pstat(s, __a, " Bytes alloced = 0x%llx\n", a->bytes_alloced); | ||
352 | __alloc_pstat(s, __a, " Bytes freed = 0x%llx\n", a->bytes_freed); | ||
353 | __alloc_pstat(s, __a, " Outstanding = 0x%llx\n", | ||
354 | a->bytes_alloced - a->bytes_freed); | ||
355 | } | ||
356 | |||
357 | static const struct nvgpu_allocator_ops bitmap_ops = { | ||
358 | .alloc = nvgpu_bitmap_alloc, | ||
359 | .free = nvgpu_bitmap_free, | ||
360 | |||
361 | .alloc_fixed = nvgpu_bitmap_alloc_fixed, | ||
362 | .free_fixed = nvgpu_bitmap_free_fixed, | ||
363 | |||
364 | .base = nvgpu_bitmap_alloc_base, | ||
365 | .length = nvgpu_bitmap_alloc_length, | ||
366 | .end = nvgpu_bitmap_alloc_end, | ||
367 | .inited = nvgpu_bitmap_alloc_inited, | ||
368 | |||
369 | .fini = nvgpu_bitmap_alloc_destroy, | ||
370 | |||
371 | .print_stats = nvgpu_bitmap_print_stats, | ||
372 | }; | ||
373 | |||
374 | |||
375 | int nvgpu_bitmap_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, | ||
376 | const char *name, u64 base, u64 length, | ||
377 | u64 blk_size, u64 flags) | ||
378 | { | ||
379 | int err; | ||
380 | struct nvgpu_bitmap_allocator *a; | ||
381 | |||
382 | mutex_lock(&meta_data_cache_lock); | ||
383 | if (!meta_data_cache) | ||
384 | meta_data_cache = KMEM_CACHE(nvgpu_bitmap_alloc, 0); | ||
385 | mutex_unlock(&meta_data_cache_lock); | ||
386 | |||
387 | if (!meta_data_cache) | ||
388 | return -ENOMEM; | ||
389 | |||
390 | if (WARN_ON(blk_size & (blk_size - 1))) | ||
391 | return -EINVAL; | ||
392 | |||
393 | /* | ||
394 | * blk_size must be a power-of-2; base length also need to be aligned | ||
395 | * to blk_size. | ||
396 | */ | ||
397 | if (blk_size & (blk_size - 1) || | ||
398 | base & (blk_size - 1) || length & (blk_size - 1)) | ||
399 | return -EINVAL; | ||
400 | |||
401 | if (base == 0) { | ||
402 | base = blk_size; | ||
403 | length -= blk_size; | ||
404 | } | ||
405 | |||
406 | a = kzalloc(sizeof(struct nvgpu_bitmap_allocator), GFP_KERNEL); | ||
407 | if (!a) | ||
408 | return -ENOMEM; | ||
409 | |||
410 | err = __nvgpu_alloc_common_init(__a, name, a, false, &bitmap_ops); | ||
411 | if (err) | ||
412 | goto fail; | ||
413 | |||
414 | a->base = base; | ||
415 | a->length = length; | ||
416 | a->blk_size = blk_size; | ||
417 | a->blk_shift = __ffs(a->blk_size); | ||
418 | a->num_bits = length >> a->blk_shift; | ||
419 | a->bit_offs = a->base >> a->blk_shift; | ||
420 | a->flags = flags; | ||
421 | |||
422 | a->bitmap = kcalloc(BITS_TO_LONGS(a->num_bits), sizeof(*a->bitmap), | ||
423 | GFP_KERNEL); | ||
424 | if (!a->bitmap) | ||
425 | goto fail; | ||
426 | |||
427 | wmb(); | ||
428 | a->inited = true; | ||
429 | |||
430 | nvgpu_init_alloc_debug(g, __a); | ||
431 | alloc_dbg(__a, "New allocator: type bitmap\n"); | ||
432 | alloc_dbg(__a, " base 0x%llx\n", a->base); | ||
433 | alloc_dbg(__a, " bit_offs 0x%llx\n", a->bit_offs); | ||
434 | alloc_dbg(__a, " size 0x%llx\n", a->length); | ||
435 | alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); | ||
436 | alloc_dbg(__a, " flags 0x%llx\n", a->flags); | ||
437 | |||
438 | return 0; | ||
439 | |||
440 | fail: | ||
441 | kfree(a); | ||
442 | return err; | ||
443 | } | ||
diff --git a/drivers/gpu/nvgpu/common/mm/bitmap_allocator_priv.h b/drivers/gpu/nvgpu/common/mm/bitmap_allocator_priv.h new file mode 100644 index 00000000..9802b9db --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/bitmap_allocator_priv.h | |||
@@ -0,0 +1,70 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef BITMAP_ALLOCATOR_PRIV_H | ||
18 | #define BITMAP_ALLOCATOR_PRIV_H | ||
19 | |||
20 | #include <linux/rbtree.h> | ||
21 | |||
22 | struct nvgpu_allocator; | ||
23 | |||
24 | struct nvgpu_bitmap_allocator { | ||
25 | struct nvgpu_allocator *owner; | ||
26 | |||
27 | u64 base; /* Base address of the space. */ | ||
28 | u64 length; /* Length of the space. */ | ||
29 | u64 blk_size; /* Size that corresponds to 1 bit. */ | ||
30 | u64 blk_shift; /* Bit shift to divide by blk_size. */ | ||
31 | u64 num_bits; /* Number of allocatable bits. */ | ||
32 | u64 bit_offs; /* Offset of bitmap. */ | ||
33 | |||
34 | /* | ||
35 | * Optimization for making repeated allocations faster. Keep track of | ||
36 | * the next bit after the most recent allocation. This is where the next | ||
37 | * search will start from. This should make allocation faster in cases | ||
38 | * where lots of allocations get made one after another. It shouldn't | ||
39 | * have a negative impact on the case where the allocator is fragmented. | ||
40 | */ | ||
41 | u64 next_blk; | ||
42 | |||
43 | unsigned long *bitmap; /* The actual bitmap! */ | ||
44 | struct rb_root allocs; /* Tree of outstanding allocations. */ | ||
45 | |||
46 | u64 flags; | ||
47 | |||
48 | bool inited; | ||
49 | |||
50 | /* Statistics */ | ||
51 | u64 nr_allocs; | ||
52 | u64 nr_fixed_allocs; | ||
53 | u64 bytes_alloced; | ||
54 | u64 bytes_freed; | ||
55 | }; | ||
56 | |||
57 | struct nvgpu_bitmap_alloc { | ||
58 | u64 base; | ||
59 | u64 length; | ||
60 | struct rb_node alloc_entry; /* RB tree of allocations. */ | ||
61 | }; | ||
62 | |||
63 | static inline struct nvgpu_bitmap_allocator *bitmap_allocator( | ||
64 | struct nvgpu_allocator *a) | ||
65 | { | ||
66 | return (struct nvgpu_bitmap_allocator *)(a)->priv; | ||
67 | } | ||
68 | |||
69 | |||
70 | #endif | ||
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c new file mode 100644 index 00000000..39a53801 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c | |||
@@ -0,0 +1,1329 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/slab.h> | ||
19 | |||
20 | #include <nvgpu/allocator.h> | ||
21 | |||
22 | #include "gk20a/mm_gk20a.h" | ||
23 | #include "gk20a/platform_gk20a.h" | ||
24 | |||
25 | #include "buddy_allocator_priv.h" | ||
26 | |||
27 | static struct kmem_cache *buddy_cache; /* slab cache for meta data. */ | ||
28 | |||
29 | /* Some other buddy allocator functions. */ | ||
30 | static struct nvgpu_buddy *balloc_free_buddy(struct nvgpu_buddy_allocator *a, | ||
31 | u64 addr); | ||
32 | static void balloc_coalesce(struct nvgpu_buddy_allocator *a, | ||
33 | struct nvgpu_buddy *b); | ||
34 | static void __balloc_do_free_fixed(struct nvgpu_buddy_allocator *a, | ||
35 | struct nvgpu_fixed_alloc *falloc); | ||
36 | |||
37 | /* | ||
38 | * This function is not present in older kernel's list.h code. | ||
39 | */ | ||
40 | #ifndef list_last_entry | ||
41 | #define list_last_entry(ptr, type, member) \ | ||
42 | list_entry((ptr)->prev, type, member) | ||
43 | #endif | ||
44 | |||
45 | /* | ||
46 | * GPU buddy allocator for various address spaces. | ||
47 | * | ||
48 | * Current limitations: | ||
49 | * o A fixed allocation could potentially be made that borders PDEs with | ||
50 | * different PTE sizes. This would require that fixed buffer to have | ||
51 | * different sized PTEs for different parts of the allocation. Probably | ||
52 | * best to just require PDE alignment for fixed address allocs. | ||
53 | * | ||
54 | * o It is currently possible to make an allocator that has a buddy alignment | ||
55 | * out of sync with the PDE block size alignment. A simple example is a | ||
56 | * 32GB address space starting at byte 1. Every buddy is shifted off by 1 | ||
57 | * which means each buddy corresponf to more than one actual GPU page. The | ||
58 | * best way to fix this is probably just require PDE blocksize alignment | ||
59 | * for the start of the address space. At the moment all allocators are | ||
60 | * easily PDE aligned so this hasn't been a problem. | ||
61 | */ | ||
62 | |||
63 | /* | ||
64 | * Pick a suitable maximum order for this allocator. | ||
65 | * | ||
66 | * Hueristic: Just guessing that the best max order is the largest single | ||
67 | * block that will fit in the address space. | ||
68 | */ | ||
69 | static void balloc_compute_max_order(struct nvgpu_buddy_allocator *a) | ||
70 | { | ||
71 | u64 true_max_order = ilog2(a->blks); | ||
72 | |||
73 | if (a->max_order == 0) { | ||
74 | a->max_order = true_max_order; | ||
75 | return; | ||
76 | } | ||
77 | |||
78 | if (a->max_order > true_max_order) | ||
79 | a->max_order = true_max_order; | ||
80 | if (a->max_order > GPU_BALLOC_MAX_ORDER) | ||
81 | a->max_order = GPU_BALLOC_MAX_ORDER; | ||
82 | } | ||
83 | |||
84 | /* | ||
85 | * Since we can only allocate in chucks of a->blk_size we need to trim off | ||
86 | * any excess data that is not aligned to a->blk_size. | ||
87 | */ | ||
88 | static void balloc_allocator_align(struct nvgpu_buddy_allocator *a) | ||
89 | { | ||
90 | a->start = ALIGN(a->base, a->blk_size); | ||
91 | WARN_ON(a->start != a->base); | ||
92 | a->end = (a->base + a->length) & ~(a->blk_size - 1); | ||
93 | a->count = a->end - a->start; | ||
94 | a->blks = a->count >> a->blk_shift; | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * Pass NULL for parent if you want a top level buddy. | ||
99 | */ | ||
100 | static struct nvgpu_buddy *balloc_new_buddy(struct nvgpu_buddy_allocator *a, | ||
101 | struct nvgpu_buddy *parent, | ||
102 | u64 start, u64 order) | ||
103 | { | ||
104 | struct nvgpu_buddy *new_buddy; | ||
105 | |||
106 | new_buddy = kmem_cache_alloc(buddy_cache, GFP_KERNEL); | ||
107 | if (!new_buddy) | ||
108 | return NULL; | ||
109 | |||
110 | memset(new_buddy, 0, sizeof(struct nvgpu_buddy)); | ||
111 | |||
112 | new_buddy->parent = parent; | ||
113 | new_buddy->start = start; | ||
114 | new_buddy->order = order; | ||
115 | new_buddy->end = start + (1 << order) * a->blk_size; | ||
116 | new_buddy->pte_size = BALLOC_PTE_SIZE_ANY; | ||
117 | |||
118 | return new_buddy; | ||
119 | } | ||
120 | |||
121 | static void __balloc_buddy_list_add(struct nvgpu_buddy_allocator *a, | ||
122 | struct nvgpu_buddy *b, | ||
123 | struct list_head *list) | ||
124 | { | ||
125 | if (buddy_is_in_list(b)) { | ||
126 | alloc_dbg(balloc_owner(a), | ||
127 | "Oops: adding added buddy (%llu:0x%llx)\n", | ||
128 | b->order, b->start); | ||
129 | BUG(); | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * Add big PTE blocks to the tail, small to the head for GVA spaces. | ||
134 | * This lets the code that checks if there are available blocks check | ||
135 | * without cycling through the entire list. | ||
136 | */ | ||
137 | if (a->flags & GPU_ALLOC_GVA_SPACE && | ||
138 | b->pte_size == gmmu_page_size_big) | ||
139 | list_add_tail(&b->buddy_entry, list); | ||
140 | else | ||
141 | list_add(&b->buddy_entry, list); | ||
142 | |||
143 | buddy_set_in_list(b); | ||
144 | } | ||
145 | |||
146 | static void __balloc_buddy_list_rem(struct nvgpu_buddy_allocator *a, | ||
147 | struct nvgpu_buddy *b) | ||
148 | { | ||
149 | if (!buddy_is_in_list(b)) { | ||
150 | alloc_dbg(balloc_owner(a), | ||
151 | "Oops: removing removed buddy (%llu:0x%llx)\n", | ||
152 | b->order, b->start); | ||
153 | BUG(); | ||
154 | } | ||
155 | |||
156 | list_del_init(&b->buddy_entry); | ||
157 | buddy_clr_in_list(b); | ||
158 | } | ||
159 | |||
160 | /* | ||
161 | * Add a buddy to one of the buddy lists and deal with the necessary | ||
162 | * book keeping. Adds the buddy to the list specified by the buddy's order. | ||
163 | */ | ||
164 | static void balloc_blist_add(struct nvgpu_buddy_allocator *a, | ||
165 | struct nvgpu_buddy *b) | ||
166 | { | ||
167 | __balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order)); | ||
168 | a->buddy_list_len[b->order]++; | ||
169 | } | ||
170 | |||
171 | static void balloc_blist_rem(struct nvgpu_buddy_allocator *a, | ||
172 | struct nvgpu_buddy *b) | ||
173 | { | ||
174 | __balloc_buddy_list_rem(a, b); | ||
175 | a->buddy_list_len[b->order]--; | ||
176 | } | ||
177 | |||
178 | static u64 balloc_get_order(struct nvgpu_buddy_allocator *a, u64 len) | ||
179 | { | ||
180 | if (len == 0) | ||
181 | return 0; | ||
182 | |||
183 | len--; | ||
184 | len >>= a->blk_shift; | ||
185 | |||
186 | return fls(len); | ||
187 | } | ||
188 | |||
189 | static u64 __balloc_max_order_in(struct nvgpu_buddy_allocator *a, | ||
190 | u64 start, u64 end) | ||
191 | { | ||
192 | u64 size = (end - start) >> a->blk_shift; | ||
193 | |||
194 | if (size > 0) | ||
195 | return min_t(u64, ilog2(size), a->max_order); | ||
196 | else | ||
197 | return GPU_BALLOC_MAX_ORDER; | ||
198 | } | ||
199 | |||
200 | /* | ||
201 | * Initialize the buddy lists. | ||
202 | */ | ||
203 | static int balloc_init_lists(struct nvgpu_buddy_allocator *a) | ||
204 | { | ||
205 | int i; | ||
206 | u64 bstart, bend, order; | ||
207 | struct nvgpu_buddy *buddy; | ||
208 | |||
209 | bstart = a->start; | ||
210 | bend = a->end; | ||
211 | |||
212 | /* First make sure the LLs are valid. */ | ||
213 | for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) | ||
214 | INIT_LIST_HEAD(balloc_get_order_list(a, i)); | ||
215 | |||
216 | while (bstart < bend) { | ||
217 | order = __balloc_max_order_in(a, bstart, bend); | ||
218 | |||
219 | buddy = balloc_new_buddy(a, NULL, bstart, order); | ||
220 | if (!buddy) | ||
221 | goto cleanup; | ||
222 | |||
223 | balloc_blist_add(a, buddy); | ||
224 | bstart += balloc_order_to_len(a, order); | ||
225 | } | ||
226 | |||
227 | return 0; | ||
228 | |||
229 | cleanup: | ||
230 | for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) { | ||
231 | if (!list_empty(balloc_get_order_list(a, i))) { | ||
232 | buddy = list_first_entry(balloc_get_order_list(a, i), | ||
233 | struct nvgpu_buddy, buddy_entry); | ||
234 | balloc_blist_rem(a, buddy); | ||
235 | kmem_cache_free(buddy_cache, buddy); | ||
236 | } | ||
237 | } | ||
238 | |||
239 | return -ENOMEM; | ||
240 | } | ||
241 | |||
242 | /* | ||
243 | * Clean up and destroy the passed allocator. | ||
244 | */ | ||
245 | static void nvgpu_buddy_allocator_destroy(struct nvgpu_allocator *__a) | ||
246 | { | ||
247 | int i; | ||
248 | struct rb_node *node; | ||
249 | struct nvgpu_buddy *bud; | ||
250 | struct nvgpu_fixed_alloc *falloc; | ||
251 | struct nvgpu_buddy_allocator *a = __a->priv; | ||
252 | |||
253 | alloc_lock(__a); | ||
254 | |||
255 | nvgpu_fini_alloc_debug(__a); | ||
256 | |||
257 | /* | ||
258 | * Free the fixed allocs first. | ||
259 | */ | ||
260 | while ((node = rb_first(&a->fixed_allocs)) != NULL) { | ||
261 | falloc = container_of(node, | ||
262 | struct nvgpu_fixed_alloc, alloced_entry); | ||
263 | |||
264 | rb_erase(node, &a->fixed_allocs); | ||
265 | __balloc_do_free_fixed(a, falloc); | ||
266 | } | ||
267 | |||
268 | /* | ||
269 | * And now free all outstanding allocations. | ||
270 | */ | ||
271 | while ((node = rb_first(&a->alloced_buddies)) != NULL) { | ||
272 | bud = container_of(node, struct nvgpu_buddy, alloced_entry); | ||
273 | balloc_free_buddy(a, bud->start); | ||
274 | balloc_blist_add(a, bud); | ||
275 | balloc_coalesce(a, bud); | ||
276 | } | ||
277 | |||
278 | /* | ||
279 | * Now clean up the unallocated buddies. | ||
280 | */ | ||
281 | for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) { | ||
282 | BUG_ON(a->buddy_list_alloced[i] != 0); | ||
283 | |||
284 | while (!list_empty(balloc_get_order_list(a, i))) { | ||
285 | bud = list_first_entry(balloc_get_order_list(a, i), | ||
286 | struct nvgpu_buddy, buddy_entry); | ||
287 | balloc_blist_rem(a, bud); | ||
288 | kmem_cache_free(buddy_cache, bud); | ||
289 | } | ||
290 | |||
291 | if (a->buddy_list_len[i] != 0) { | ||
292 | pr_info("Excess buddies!!! (%d: %llu)\n", | ||
293 | i, a->buddy_list_len[i]); | ||
294 | BUG(); | ||
295 | } | ||
296 | if (a->buddy_list_split[i] != 0) { | ||
297 | pr_info("Excess split nodes!!! (%d: %llu)\n", | ||
298 | i, a->buddy_list_split[i]); | ||
299 | BUG(); | ||
300 | } | ||
301 | if (a->buddy_list_alloced[i] != 0) { | ||
302 | pr_info("Excess alloced nodes!!! (%d: %llu)\n", | ||
303 | i, a->buddy_list_alloced[i]); | ||
304 | BUG(); | ||
305 | } | ||
306 | } | ||
307 | |||
308 | kfree(a); | ||
309 | |||
310 | alloc_unlock(__a); | ||
311 | } | ||
312 | |||
313 | /* | ||
314 | * Combine the passed buddy if possible. The pointer in @b may not be valid | ||
315 | * after this as the buddy may be freed. | ||
316 | * | ||
317 | * @a must be locked. | ||
318 | */ | ||
319 | static void balloc_coalesce(struct nvgpu_buddy_allocator *a, | ||
320 | struct nvgpu_buddy *b) | ||
321 | { | ||
322 | struct nvgpu_buddy *parent; | ||
323 | |||
324 | if (buddy_is_alloced(b) || buddy_is_split(b)) | ||
325 | return; | ||
326 | |||
327 | /* | ||
328 | * If both our buddy and I are both not allocated and not split then | ||
329 | * we can coalesce ourselves. | ||
330 | */ | ||
331 | if (!b->buddy) | ||
332 | return; | ||
333 | if (buddy_is_alloced(b->buddy) || buddy_is_split(b->buddy)) | ||
334 | return; | ||
335 | |||
336 | parent = b->parent; | ||
337 | |||
338 | balloc_blist_rem(a, b); | ||
339 | balloc_blist_rem(a, b->buddy); | ||
340 | |||
341 | buddy_clr_split(parent); | ||
342 | a->buddy_list_split[parent->order]--; | ||
343 | balloc_blist_add(a, parent); | ||
344 | |||
345 | /* | ||
346 | * Recursively coalesce as far as we can go. | ||
347 | */ | ||
348 | balloc_coalesce(a, parent); | ||
349 | |||
350 | /* Clean up the remains. */ | ||
351 | kmem_cache_free(buddy_cache, b->buddy); | ||
352 | kmem_cache_free(buddy_cache, b); | ||
353 | } | ||
354 | |||
355 | /* | ||
356 | * Split a buddy into two new buddies who are 1/2 the size of the parent buddy. | ||
357 | * | ||
358 | * @a must be locked. | ||
359 | */ | ||
360 | static int balloc_split_buddy(struct nvgpu_buddy_allocator *a, | ||
361 | struct nvgpu_buddy *b, int pte_size) | ||
362 | { | ||
363 | struct nvgpu_buddy *left, *right; | ||
364 | u64 half; | ||
365 | |||
366 | left = balloc_new_buddy(a, b, b->start, b->order - 1); | ||
367 | if (!left) | ||
368 | return -ENOMEM; | ||
369 | |||
370 | half = (b->end - b->start) / 2; | ||
371 | |||
372 | right = balloc_new_buddy(a, b, b->start + half, b->order - 1); | ||
373 | if (!right) { | ||
374 | kmem_cache_free(buddy_cache, left); | ||
375 | return -ENOMEM; | ||
376 | } | ||
377 | |||
378 | buddy_set_split(b); | ||
379 | a->buddy_list_split[b->order]++; | ||
380 | |||
381 | b->left = left; | ||
382 | b->right = right; | ||
383 | left->buddy = right; | ||
384 | right->buddy = left; | ||
385 | left->parent = b; | ||
386 | right->parent = b; | ||
387 | |||
388 | /* PTE considerations. */ | ||
389 | if (a->flags & GPU_ALLOC_GVA_SPACE && | ||
390 | left->order <= a->pte_blk_order) { | ||
391 | left->pte_size = pte_size; | ||
392 | right->pte_size = pte_size; | ||
393 | } | ||
394 | |||
395 | balloc_blist_rem(a, b); | ||
396 | balloc_blist_add(a, left); | ||
397 | balloc_blist_add(a, right); | ||
398 | |||
399 | return 0; | ||
400 | } | ||
401 | |||
402 | /* | ||
403 | * Place the passed buddy into the RB tree for allocated buddies. Never fails | ||
404 | * unless the passed entry is a duplicate which is a bug. | ||
405 | * | ||
406 | * @a must be locked. | ||
407 | */ | ||
408 | static void balloc_alloc_buddy(struct nvgpu_buddy_allocator *a, | ||
409 | struct nvgpu_buddy *b) | ||
410 | { | ||
411 | struct rb_node **new = &(a->alloced_buddies.rb_node); | ||
412 | struct rb_node *parent = NULL; | ||
413 | |||
414 | while (*new) { | ||
415 | struct nvgpu_buddy *bud = container_of(*new, struct nvgpu_buddy, | ||
416 | alloced_entry); | ||
417 | |||
418 | parent = *new; | ||
419 | if (b->start < bud->start) | ||
420 | new = &((*new)->rb_left); | ||
421 | else if (b->start > bud->start) | ||
422 | new = &((*new)->rb_right); | ||
423 | else | ||
424 | BUG_ON("Duplicate entries in allocated list!\n"); | ||
425 | } | ||
426 | |||
427 | rb_link_node(&b->alloced_entry, parent, new); | ||
428 | rb_insert_color(&b->alloced_entry, &a->alloced_buddies); | ||
429 | |||
430 | buddy_set_alloced(b); | ||
431 | a->buddy_list_alloced[b->order]++; | ||
432 | } | ||
433 | |||
434 | /* | ||
435 | * Remove the passed buddy from the allocated buddy RB tree. Returns the | ||
436 | * deallocated buddy for further processing. | ||
437 | * | ||
438 | * @a must be locked. | ||
439 | */ | ||
440 | static struct nvgpu_buddy *balloc_free_buddy(struct nvgpu_buddy_allocator *a, | ||
441 | u64 addr) | ||
442 | { | ||
443 | struct rb_node *node = a->alloced_buddies.rb_node; | ||
444 | struct nvgpu_buddy *bud; | ||
445 | |||
446 | while (node) { | ||
447 | bud = container_of(node, struct nvgpu_buddy, alloced_entry); | ||
448 | |||
449 | if (addr < bud->start) | ||
450 | node = node->rb_left; | ||
451 | else if (addr > bud->start) | ||
452 | node = node->rb_right; | ||
453 | else | ||
454 | break; | ||
455 | } | ||
456 | |||
457 | if (!node) | ||
458 | return NULL; | ||
459 | |||
460 | rb_erase(node, &a->alloced_buddies); | ||
461 | buddy_clr_alloced(bud); | ||
462 | a->buddy_list_alloced[bud->order]--; | ||
463 | |||
464 | return bud; | ||
465 | } | ||
466 | |||
467 | /* | ||
468 | * Find a suitable buddy for the given order and PTE type (big or little). | ||
469 | */ | ||
470 | static struct nvgpu_buddy *__balloc_find_buddy(struct nvgpu_buddy_allocator *a, | ||
471 | u64 order, int pte_size) | ||
472 | { | ||
473 | struct nvgpu_buddy *bud; | ||
474 | |||
475 | if (order > a->max_order || | ||
476 | list_empty(balloc_get_order_list(a, order))) | ||
477 | return NULL; | ||
478 | |||
479 | if (a->flags & GPU_ALLOC_GVA_SPACE && | ||
480 | pte_size == gmmu_page_size_big) | ||
481 | bud = list_last_entry(balloc_get_order_list(a, order), | ||
482 | struct nvgpu_buddy, buddy_entry); | ||
483 | else | ||
484 | bud = list_first_entry(balloc_get_order_list(a, order), | ||
485 | struct nvgpu_buddy, buddy_entry); | ||
486 | |||
487 | if (bud->pte_size != BALLOC_PTE_SIZE_ANY && | ||
488 | bud->pte_size != pte_size) | ||
489 | return NULL; | ||
490 | |||
491 | return bud; | ||
492 | } | ||
493 | |||
494 | /* | ||
495 | * Allocate a suitably sized buddy. If no suitable buddy exists split higher | ||
496 | * order buddies until we have a suitable buddy to allocate. | ||
497 | * | ||
498 | * For PDE grouping add an extra check to see if a buddy is suitable: that the | ||
499 | * buddy exists in a PDE who's PTE size is reasonable | ||
500 | * | ||
501 | * @a must be locked. | ||
502 | */ | ||
503 | static u64 __balloc_do_alloc(struct nvgpu_buddy_allocator *a, | ||
504 | u64 order, int pte_size) | ||
505 | { | ||
506 | u64 split_order; | ||
507 | struct nvgpu_buddy *bud = NULL; | ||
508 | |||
509 | split_order = order; | ||
510 | while (split_order <= a->max_order && | ||
511 | !(bud = __balloc_find_buddy(a, split_order, pte_size))) | ||
512 | split_order++; | ||
513 | |||
514 | /* Out of memory! */ | ||
515 | if (!bud) | ||
516 | return 0; | ||
517 | |||
518 | while (bud->order != order) { | ||
519 | if (balloc_split_buddy(a, bud, pte_size)) | ||
520 | return 0; /* No mem... */ | ||
521 | bud = bud->left; | ||
522 | } | ||
523 | |||
524 | balloc_blist_rem(a, bud); | ||
525 | balloc_alloc_buddy(a, bud); | ||
526 | |||
527 | return bud->start; | ||
528 | } | ||
529 | |||
530 | /* | ||
531 | * See if the passed range is actually available for allocation. If so, then | ||
532 | * return 1, otherwise return 0. | ||
533 | * | ||
534 | * TODO: Right now this uses the unoptimal approach of going through all | ||
535 | * outstanding allocations and checking their base/ends. This could be better. | ||
536 | */ | ||
537 | static int balloc_is_range_free(struct nvgpu_buddy_allocator *a, | ||
538 | u64 base, u64 end) | ||
539 | { | ||
540 | struct rb_node *node; | ||
541 | struct nvgpu_buddy *bud; | ||
542 | |||
543 | node = rb_first(&a->alloced_buddies); | ||
544 | if (!node) | ||
545 | return 1; /* No allocs yet. */ | ||
546 | |||
547 | bud = container_of(node, struct nvgpu_buddy, alloced_entry); | ||
548 | |||
549 | while (bud->start < end) { | ||
550 | if ((bud->start > base && bud->start < end) || | ||
551 | (bud->end > base && bud->end < end)) | ||
552 | return 0; | ||
553 | |||
554 | node = rb_next(node); | ||
555 | if (!node) | ||
556 | break; | ||
557 | bud = container_of(node, struct nvgpu_buddy, alloced_entry); | ||
558 | } | ||
559 | |||
560 | return 1; | ||
561 | } | ||
562 | |||
563 | static void balloc_alloc_fixed(struct nvgpu_buddy_allocator *a, | ||
564 | struct nvgpu_fixed_alloc *f) | ||
565 | { | ||
566 | struct rb_node **new = &(a->fixed_allocs.rb_node); | ||
567 | struct rb_node *parent = NULL; | ||
568 | |||
569 | while (*new) { | ||
570 | struct nvgpu_fixed_alloc *falloc = | ||
571 | container_of(*new, struct nvgpu_fixed_alloc, | ||
572 | alloced_entry); | ||
573 | |||
574 | BUG_ON(!virt_addr_valid(falloc)); | ||
575 | |||
576 | parent = *new; | ||
577 | if (f->start < falloc->start) | ||
578 | new = &((*new)->rb_left); | ||
579 | else if (f->start > falloc->start) | ||
580 | new = &((*new)->rb_right); | ||
581 | else | ||
582 | BUG_ON("Duplicate entries in allocated list!\n"); | ||
583 | } | ||
584 | |||
585 | rb_link_node(&f->alloced_entry, parent, new); | ||
586 | rb_insert_color(&f->alloced_entry, &a->fixed_allocs); | ||
587 | } | ||
588 | |||
589 | /* | ||
590 | * Remove the passed buddy from the allocated buddy RB tree. Returns the | ||
591 | * deallocated buddy for further processing. | ||
592 | * | ||
593 | * @a must be locked. | ||
594 | */ | ||
595 | static struct nvgpu_fixed_alloc *balloc_free_fixed( | ||
596 | struct nvgpu_buddy_allocator *a, u64 addr) | ||
597 | { | ||
598 | struct rb_node *node = a->fixed_allocs.rb_node; | ||
599 | struct nvgpu_fixed_alloc *falloc; | ||
600 | |||
601 | while (node) { | ||
602 | falloc = container_of(node, | ||
603 | struct nvgpu_fixed_alloc, alloced_entry); | ||
604 | |||
605 | if (addr < falloc->start) | ||
606 | node = node->rb_left; | ||
607 | else if (addr > falloc->start) | ||
608 | node = node->rb_right; | ||
609 | else | ||
610 | break; | ||
611 | } | ||
612 | |||
613 | if (!node) | ||
614 | return NULL; | ||
615 | |||
616 | rb_erase(node, &a->fixed_allocs); | ||
617 | |||
618 | return falloc; | ||
619 | } | ||
620 | |||
621 | /* | ||
622 | * Find the parent range - doesn't necessarily need the parent to actually exist | ||
623 | * as a buddy. Finding an existing parent comes later... | ||
624 | */ | ||
625 | static void __balloc_get_parent_range(struct nvgpu_buddy_allocator *a, | ||
626 | u64 base, u64 order, | ||
627 | u64 *pbase, u64 *porder) | ||
628 | { | ||
629 | u64 base_mask; | ||
630 | u64 shifted_base = balloc_base_shift(a, base); | ||
631 | |||
632 | order++; | ||
633 | base_mask = ~((a->blk_size << order) - 1); | ||
634 | |||
635 | shifted_base &= base_mask; | ||
636 | |||
637 | *pbase = balloc_base_unshift(a, shifted_base); | ||
638 | *porder = order; | ||
639 | } | ||
640 | |||
641 | /* | ||
642 | * Makes a buddy at the passed address. This will make all parent buddies | ||
643 | * necessary for this buddy to exist as well. | ||
644 | */ | ||
645 | static struct nvgpu_buddy *__balloc_make_fixed_buddy( | ||
646 | struct nvgpu_buddy_allocator *a, u64 base, u64 order) | ||
647 | { | ||
648 | struct nvgpu_buddy *bud = NULL; | ||
649 | struct list_head *order_list; | ||
650 | u64 cur_order = order, cur_base = base; | ||
651 | |||
652 | /* | ||
653 | * Algo: | ||
654 | * 1. Keep jumping up a buddy order until we find the real buddy that | ||
655 | * this buddy exists in. | ||
656 | * 2. Then work our way down through the buddy tree until we hit a dead | ||
657 | * end. | ||
658 | * 3. Start splitting buddies until we split to the one we need to | ||
659 | * make. | ||
660 | */ | ||
661 | while (cur_order <= a->max_order) { | ||
662 | int found = 0; | ||
663 | |||
664 | order_list = balloc_get_order_list(a, cur_order); | ||
665 | list_for_each_entry(bud, order_list, buddy_entry) { | ||
666 | if (bud->start == cur_base) { | ||
667 | found = 1; | ||
668 | break; | ||
669 | } | ||
670 | } | ||
671 | |||
672 | if (found) | ||
673 | break; | ||
674 | |||
675 | __balloc_get_parent_range(a, cur_base, cur_order, | ||
676 | &cur_base, &cur_order); | ||
677 | } | ||
678 | |||
679 | if (cur_order > a->max_order) { | ||
680 | alloc_dbg(balloc_owner(a), "No buddy for range ???\n"); | ||
681 | return NULL; | ||
682 | } | ||
683 | |||
684 | /* Split this buddy as necessary until we get the target buddy. */ | ||
685 | while (bud->start != base || bud->order != order) { | ||
686 | if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) { | ||
687 | balloc_coalesce(a, bud); | ||
688 | return NULL; | ||
689 | } | ||
690 | |||
691 | if (base < bud->right->start) | ||
692 | bud = bud->left; | ||
693 | else | ||
694 | bud = bud->right; | ||
695 | |||
696 | } | ||
697 | |||
698 | return bud; | ||
699 | } | ||
700 | |||
701 | static u64 __balloc_do_alloc_fixed(struct nvgpu_buddy_allocator *a, | ||
702 | struct nvgpu_fixed_alloc *falloc, | ||
703 | u64 base, u64 len) | ||
704 | { | ||
705 | u64 shifted_base, inc_base; | ||
706 | u64 align_order; | ||
707 | |||
708 | shifted_base = balloc_base_shift(a, base); | ||
709 | if (shifted_base == 0) | ||
710 | align_order = __fls(len >> a->blk_shift); | ||
711 | else | ||
712 | align_order = min_t(u64, | ||
713 | __ffs(shifted_base >> a->blk_shift), | ||
714 | __fls(len >> a->blk_shift)); | ||
715 | |||
716 | if (align_order > a->max_order) { | ||
717 | alloc_dbg(balloc_owner(a), | ||
718 | "Align order too big: %llu > %llu\n", | ||
719 | align_order, a->max_order); | ||
720 | return 0; | ||
721 | } | ||
722 | |||
723 | /* | ||
724 | * Generate a list of buddies that satisfy this allocation. | ||
725 | */ | ||
726 | inc_base = shifted_base; | ||
727 | while (inc_base < (shifted_base + len)) { | ||
728 | u64 order_len = balloc_order_to_len(a, align_order); | ||
729 | u64 remaining; | ||
730 | struct nvgpu_buddy *bud; | ||
731 | |||
732 | bud = __balloc_make_fixed_buddy(a, | ||
733 | balloc_base_unshift(a, inc_base), | ||
734 | align_order); | ||
735 | if (!bud) { | ||
736 | alloc_dbg(balloc_owner(a), | ||
737 | "Fixed buddy failed: {0x%llx, %llu}!\n", | ||
738 | balloc_base_unshift(a, inc_base), | ||
739 | align_order); | ||
740 | goto err_and_cleanup; | ||
741 | } | ||
742 | |||
743 | balloc_blist_rem(a, bud); | ||
744 | balloc_alloc_buddy(a, bud); | ||
745 | __balloc_buddy_list_add(a, bud, &falloc->buddies); | ||
746 | |||
747 | /* Book keeping. */ | ||
748 | inc_base += order_len; | ||
749 | remaining = (shifted_base + len) - inc_base; | ||
750 | align_order = __ffs(inc_base >> a->blk_shift); | ||
751 | |||
752 | /* If we don't have much left - trim down align_order. */ | ||
753 | if (balloc_order_to_len(a, align_order) > remaining) | ||
754 | align_order = __balloc_max_order_in(a, inc_base, | ||
755 | inc_base + remaining); | ||
756 | } | ||
757 | |||
758 | return base; | ||
759 | |||
760 | err_and_cleanup: | ||
761 | while (!list_empty(&falloc->buddies)) { | ||
762 | struct nvgpu_buddy *bud = list_first_entry(&falloc->buddies, | ||
763 | struct nvgpu_buddy, | ||
764 | buddy_entry); | ||
765 | |||
766 | __balloc_buddy_list_rem(a, bud); | ||
767 | balloc_free_buddy(a, bud->start); | ||
768 | kmem_cache_free(buddy_cache, bud); | ||
769 | } | ||
770 | |||
771 | return 0; | ||
772 | } | ||
773 | |||
774 | static void __balloc_do_free_fixed(struct nvgpu_buddy_allocator *a, | ||
775 | struct nvgpu_fixed_alloc *falloc) | ||
776 | { | ||
777 | struct nvgpu_buddy *bud; | ||
778 | |||
779 | while (!list_empty(&falloc->buddies)) { | ||
780 | bud = list_first_entry(&falloc->buddies, | ||
781 | struct nvgpu_buddy, | ||
782 | buddy_entry); | ||
783 | __balloc_buddy_list_rem(a, bud); | ||
784 | |||
785 | balloc_free_buddy(a, bud->start); | ||
786 | balloc_blist_add(a, bud); | ||
787 | a->bytes_freed += balloc_order_to_len(a, bud->order); | ||
788 | |||
789 | /* | ||
790 | * Attemp to defrag the allocation. | ||
791 | */ | ||
792 | balloc_coalesce(a, bud); | ||
793 | } | ||
794 | |||
795 | kfree(falloc); | ||
796 | } | ||
797 | |||
798 | /* | ||
799 | * Allocate memory from the passed allocator. | ||
800 | */ | ||
801 | static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *__a, u64 len) | ||
802 | { | ||
803 | u64 order, addr; | ||
804 | int pte_size; | ||
805 | struct nvgpu_buddy_allocator *a = __a->priv; | ||
806 | |||
807 | nvgpu_alloc_trace_func(); | ||
808 | |||
809 | alloc_lock(__a); | ||
810 | |||
811 | order = balloc_get_order(a, len); | ||
812 | |||
813 | if (order > a->max_order) { | ||
814 | alloc_unlock(__a); | ||
815 | alloc_dbg(balloc_owner(a), "Alloc fail\n"); | ||
816 | nvgpu_alloc_trace_func_done(); | ||
817 | return 0; | ||
818 | } | ||
819 | |||
820 | /* | ||
821 | * For now pass the base address of the allocator's region to | ||
822 | * __get_pte_size(). This ensures we get the right page size for | ||
823 | * the alloc but we don't have to know what the real address is | ||
824 | * going to be quite yet. | ||
825 | * | ||
826 | * TODO: once userspace supports a unified address space pass 0 for | ||
827 | * the base. This will make only 'len' affect the PTE size. | ||
828 | */ | ||
829 | if (a->flags & GPU_ALLOC_GVA_SPACE) | ||
830 | pte_size = __get_pte_size(a->vm, a->base, len); | ||
831 | else | ||
832 | pte_size = BALLOC_PTE_SIZE_ANY; | ||
833 | |||
834 | addr = __balloc_do_alloc(a, order, pte_size); | ||
835 | |||
836 | if (addr) { | ||
837 | a->bytes_alloced += len; | ||
838 | a->bytes_alloced_real += balloc_order_to_len(a, order); | ||
839 | alloc_dbg(balloc_owner(a), | ||
840 | "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n", | ||
841 | addr, order, len, | ||
842 | pte_size == gmmu_page_size_big ? "big" : | ||
843 | pte_size == gmmu_page_size_small ? "small" : | ||
844 | "NA/any"); | ||
845 | } else { | ||
846 | alloc_dbg(balloc_owner(a), "Alloc failed: no mem!\n"); | ||
847 | } | ||
848 | |||
849 | a->alloc_made = 1; | ||
850 | |||
851 | alloc_unlock(__a); | ||
852 | |||
853 | nvgpu_alloc_trace_func_done(); | ||
854 | return addr; | ||
855 | } | ||
856 | |||
857 | /* | ||
858 | * Requires @__a to be locked. | ||
859 | */ | ||
860 | static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, | ||
861 | u64 base, u64 len) | ||
862 | { | ||
863 | u64 ret, real_bytes = 0; | ||
864 | struct nvgpu_buddy *bud; | ||
865 | struct nvgpu_fixed_alloc *falloc = NULL; | ||
866 | struct nvgpu_buddy_allocator *a = __a->priv; | ||
867 | |||
868 | nvgpu_alloc_trace_func(); | ||
869 | |||
870 | /* If base isn't aligned to an order 0 block, fail. */ | ||
871 | if (base & (a->blk_size - 1)) | ||
872 | goto fail; | ||
873 | |||
874 | if (len == 0) | ||
875 | goto fail; | ||
876 | |||
877 | falloc = kmalloc(sizeof(*falloc), GFP_KERNEL); | ||
878 | if (!falloc) | ||
879 | goto fail; | ||
880 | |||
881 | INIT_LIST_HEAD(&falloc->buddies); | ||
882 | falloc->start = base; | ||
883 | falloc->end = base + len; | ||
884 | |||
885 | if (!balloc_is_range_free(a, base, base + len)) { | ||
886 | alloc_dbg(balloc_owner(a), | ||
887 | "Range not free: 0x%llx -> 0x%llx\n", | ||
888 | base, base + len); | ||
889 | goto fail_unlock; | ||
890 | } | ||
891 | |||
892 | ret = __balloc_do_alloc_fixed(a, falloc, base, len); | ||
893 | if (!ret) { | ||
894 | alloc_dbg(balloc_owner(a), | ||
895 | "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n", | ||
896 | base, base + len); | ||
897 | goto fail_unlock; | ||
898 | } | ||
899 | |||
900 | balloc_alloc_fixed(a, falloc); | ||
901 | |||
902 | list_for_each_entry(bud, &falloc->buddies, buddy_entry) | ||
903 | real_bytes += (bud->end - bud->start); | ||
904 | |||
905 | a->bytes_alloced += len; | ||
906 | a->bytes_alloced_real += real_bytes; | ||
907 | |||
908 | alloc_dbg(balloc_owner(a), "Alloc (fixed) 0x%llx\n", base); | ||
909 | |||
910 | nvgpu_alloc_trace_func_done(); | ||
911 | return base; | ||
912 | |||
913 | fail_unlock: | ||
914 | alloc_unlock(__a); | ||
915 | fail: | ||
916 | kfree(falloc); | ||
917 | nvgpu_alloc_trace_func_done(); | ||
918 | return 0; | ||
919 | } | ||
920 | |||
921 | /* | ||
922 | * Allocate a fixed address allocation. The address of the allocation is @base | ||
923 | * and the length is @len. This is not a typical buddy allocator operation and | ||
924 | * as such has a high posibility of failure if the address space is heavily in | ||
925 | * use. | ||
926 | * | ||
927 | * Please do not use this function unless _absolutely_ necessary. | ||
928 | */ | ||
929 | static u64 nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, | ||
930 | u64 base, u64 len) | ||
931 | { | ||
932 | u64 alloc; | ||
933 | struct nvgpu_buddy_allocator *a = __a->priv; | ||
934 | |||
935 | alloc_lock(__a); | ||
936 | alloc = __nvgpu_balloc_fixed_buddy(__a, base, len); | ||
937 | a->alloc_made = 1; | ||
938 | alloc_unlock(__a); | ||
939 | |||
940 | return alloc; | ||
941 | } | ||
942 | |||
943 | /* | ||
944 | * Free the passed allocation. | ||
945 | */ | ||
946 | static void nvgpu_buddy_bfree(struct nvgpu_allocator *__a, u64 addr) | ||
947 | { | ||
948 | struct nvgpu_buddy *bud; | ||
949 | struct nvgpu_fixed_alloc *falloc; | ||
950 | struct nvgpu_buddy_allocator *a = __a->priv; | ||
951 | |||
952 | nvgpu_alloc_trace_func(); | ||
953 | |||
954 | if (!addr) { | ||
955 | nvgpu_alloc_trace_func_done(); | ||
956 | return; | ||
957 | } | ||
958 | |||
959 | alloc_lock(__a); | ||
960 | |||
961 | /* | ||
962 | * First see if this is a fixed alloc. If not fall back to a regular | ||
963 | * buddy. | ||
964 | */ | ||
965 | falloc = balloc_free_fixed(a, addr); | ||
966 | if (falloc) { | ||
967 | __balloc_do_free_fixed(a, falloc); | ||
968 | goto done; | ||
969 | } | ||
970 | |||
971 | bud = balloc_free_buddy(a, addr); | ||
972 | if (!bud) | ||
973 | goto done; | ||
974 | |||
975 | balloc_blist_add(a, bud); | ||
976 | a->bytes_freed += balloc_order_to_len(a, bud->order); | ||
977 | |||
978 | /* | ||
979 | * Attemp to defrag the allocation. | ||
980 | */ | ||
981 | balloc_coalesce(a, bud); | ||
982 | |||
983 | done: | ||
984 | alloc_unlock(__a); | ||
985 | alloc_dbg(balloc_owner(a), "Free 0x%llx\n", addr); | ||
986 | nvgpu_alloc_trace_func_done(); | ||
987 | return; | ||
988 | } | ||
989 | |||
990 | static bool nvgpu_buddy_reserve_is_possible(struct nvgpu_buddy_allocator *a, | ||
991 | struct nvgpu_alloc_carveout *co) | ||
992 | { | ||
993 | struct nvgpu_alloc_carveout *tmp; | ||
994 | u64 co_base, co_end; | ||
995 | |||
996 | co_base = co->base; | ||
997 | co_end = co->base + co->length; | ||
998 | |||
999 | /* | ||
1000 | * Not the fastest approach but we should not have that many carveouts | ||
1001 | * for any reasonable allocator. | ||
1002 | */ | ||
1003 | list_for_each_entry(tmp, &a->co_list, co_entry) { | ||
1004 | if ((co_base >= tmp->base && | ||
1005 | co_base < (tmp->base + tmp->length)) || | ||
1006 | (co_end >= tmp->base && | ||
1007 | co_end < (tmp->base + tmp->length))) | ||
1008 | return false; | ||
1009 | } | ||
1010 | |||
1011 | return true; | ||
1012 | } | ||
1013 | |||
1014 | /* | ||
1015 | * Carveouts can only be reserved before any regular allocations have been | ||
1016 | * made. | ||
1017 | */ | ||
1018 | static int nvgpu_buddy_reserve_co(struct nvgpu_allocator *__a, | ||
1019 | struct nvgpu_alloc_carveout *co) | ||
1020 | { | ||
1021 | struct nvgpu_buddy_allocator *a = __a->priv; | ||
1022 | u64 addr; | ||
1023 | int err = 0; | ||
1024 | |||
1025 | if (co->base < a->start || (co->base + co->length) > a->end || | ||
1026 | a->alloc_made) | ||
1027 | return -EINVAL; | ||
1028 | |||
1029 | alloc_lock(__a); | ||
1030 | |||
1031 | if (!nvgpu_buddy_reserve_is_possible(a, co)) { | ||
1032 | err = -EBUSY; | ||
1033 | goto done; | ||
1034 | } | ||
1035 | |||
1036 | /* Should not be possible to fail... */ | ||
1037 | addr = __nvgpu_balloc_fixed_buddy(__a, co->base, co->length); | ||
1038 | if (!addr) { | ||
1039 | err = -ENOMEM; | ||
1040 | pr_warn("%s: Failed to reserve a valid carveout!\n", __func__); | ||
1041 | goto done; | ||
1042 | } | ||
1043 | |||
1044 | list_add(&co->co_entry, &a->co_list); | ||
1045 | |||
1046 | done: | ||
1047 | alloc_unlock(__a); | ||
1048 | return err; | ||
1049 | } | ||
1050 | |||
1051 | /* | ||
1052 | * Carveouts can be release at any time. | ||
1053 | */ | ||
1054 | static void nvgpu_buddy_release_co(struct nvgpu_allocator *__a, | ||
1055 | struct nvgpu_alloc_carveout *co) | ||
1056 | { | ||
1057 | alloc_lock(__a); | ||
1058 | |||
1059 | list_del_init(&co->co_entry); | ||
1060 | nvgpu_free(__a, co->base); | ||
1061 | |||
1062 | alloc_unlock(__a); | ||
1063 | } | ||
1064 | |||
1065 | static u64 nvgpu_buddy_alloc_length(struct nvgpu_allocator *a) | ||
1066 | { | ||
1067 | struct nvgpu_buddy_allocator *ba = a->priv; | ||
1068 | |||
1069 | return ba->length; | ||
1070 | } | ||
1071 | |||
1072 | static u64 nvgpu_buddy_alloc_base(struct nvgpu_allocator *a) | ||
1073 | { | ||
1074 | struct nvgpu_buddy_allocator *ba = a->priv; | ||
1075 | |||
1076 | return ba->start; | ||
1077 | } | ||
1078 | |||
1079 | static int nvgpu_buddy_alloc_inited(struct nvgpu_allocator *a) | ||
1080 | { | ||
1081 | struct nvgpu_buddy_allocator *ba = a->priv; | ||
1082 | int inited = ba->initialized; | ||
1083 | |||
1084 | rmb(); | ||
1085 | return inited; | ||
1086 | } | ||
1087 | |||
1088 | static u64 nvgpu_buddy_alloc_end(struct nvgpu_allocator *a) | ||
1089 | { | ||
1090 | struct nvgpu_buddy_allocator *ba = a->priv; | ||
1091 | |||
1092 | return ba->end; | ||
1093 | } | ||
1094 | |||
1095 | static u64 nvgpu_buddy_alloc_space(struct nvgpu_allocator *a) | ||
1096 | { | ||
1097 | struct nvgpu_buddy_allocator *ba = a->priv; | ||
1098 | u64 space; | ||
1099 | |||
1100 | alloc_lock(a); | ||
1101 | space = ba->end - ba->start - | ||
1102 | (ba->bytes_alloced_real - ba->bytes_freed); | ||
1103 | alloc_unlock(a); | ||
1104 | |||
1105 | return space; | ||
1106 | } | ||
1107 | |||
1108 | /* | ||
1109 | * Print the buddy allocator top level stats. If you pass @s as NULL then the | ||
1110 | * stats are printed to the kernel log. This lets this code be used for | ||
1111 | * debugging purposes internal to the allocator. | ||
1112 | */ | ||
1113 | static void nvgpu_buddy_print_stats(struct nvgpu_allocator *__a, | ||
1114 | struct seq_file *s, int lock) | ||
1115 | { | ||
1116 | int i = 0; | ||
1117 | struct rb_node *node; | ||
1118 | struct nvgpu_fixed_alloc *falloc; | ||
1119 | struct nvgpu_alloc_carveout *tmp; | ||
1120 | struct nvgpu_buddy_allocator *a = __a->priv; | ||
1121 | |||
1122 | __alloc_pstat(s, __a, "base = %llu, limit = %llu, blk_size = %llu\n", | ||
1123 | a->base, a->length, a->blk_size); | ||
1124 | __alloc_pstat(s, __a, "Internal params:\n"); | ||
1125 | __alloc_pstat(s, __a, " start = 0x%llx\n", a->start); | ||
1126 | __alloc_pstat(s, __a, " end = 0x%llx\n", a->end); | ||
1127 | __alloc_pstat(s, __a, " count = 0x%llx\n", a->count); | ||
1128 | __alloc_pstat(s, __a, " blks = 0x%llx\n", a->blks); | ||
1129 | __alloc_pstat(s, __a, " max_order = %llu\n", a->max_order); | ||
1130 | |||
1131 | if (lock) | ||
1132 | alloc_lock(__a); | ||
1133 | |||
1134 | if (!list_empty(&a->co_list)) { | ||
1135 | __alloc_pstat(s, __a, "\n"); | ||
1136 | __alloc_pstat(s, __a, "Carveouts:\n"); | ||
1137 | list_for_each_entry(tmp, &a->co_list, co_entry) | ||
1138 | __alloc_pstat(s, __a, | ||
1139 | " CO %2d: %-20s 0x%010llx + 0x%llx\n", | ||
1140 | i++, tmp->name, tmp->base, tmp->length); | ||
1141 | } | ||
1142 | |||
1143 | __alloc_pstat(s, __a, "\n"); | ||
1144 | __alloc_pstat(s, __a, "Buddy blocks:\n"); | ||
1145 | __alloc_pstat(s, __a, " Order Free Alloced Split\n"); | ||
1146 | __alloc_pstat(s, __a, " ----- ---- ------- -----\n"); | ||
1147 | |||
1148 | for (i = a->max_order; i >= 0; i--) { | ||
1149 | if (a->buddy_list_len[i] == 0 && | ||
1150 | a->buddy_list_alloced[i] == 0 && | ||
1151 | a->buddy_list_split[i] == 0) | ||
1152 | continue; | ||
1153 | |||
1154 | __alloc_pstat(s, __a, " %3d %-7llu %-9llu %llu\n", i, | ||
1155 | a->buddy_list_len[i], | ||
1156 | a->buddy_list_alloced[i], | ||
1157 | a->buddy_list_split[i]); | ||
1158 | } | ||
1159 | |||
1160 | __alloc_pstat(s, __a, "\n"); | ||
1161 | |||
1162 | for (node = rb_first(&a->fixed_allocs), i = 1; | ||
1163 | node != NULL; | ||
1164 | node = rb_next(node)) { | ||
1165 | falloc = container_of(node, | ||
1166 | struct nvgpu_fixed_alloc, alloced_entry); | ||
1167 | |||
1168 | __alloc_pstat(s, __a, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n", | ||
1169 | i, falloc->start, falloc->end); | ||
1170 | } | ||
1171 | |||
1172 | __alloc_pstat(s, __a, "\n"); | ||
1173 | __alloc_pstat(s, __a, "Bytes allocated: %llu\n", | ||
1174 | a->bytes_alloced); | ||
1175 | __alloc_pstat(s, __a, "Bytes allocated (real): %llu\n", | ||
1176 | a->bytes_alloced_real); | ||
1177 | __alloc_pstat(s, __a, "Bytes freed: %llu\n", | ||
1178 | a->bytes_freed); | ||
1179 | |||
1180 | if (lock) | ||
1181 | alloc_unlock(__a); | ||
1182 | } | ||
1183 | |||
1184 | static const struct nvgpu_allocator_ops buddy_ops = { | ||
1185 | .alloc = nvgpu_buddy_balloc, | ||
1186 | .free = nvgpu_buddy_bfree, | ||
1187 | |||
1188 | .alloc_fixed = nvgpu_balloc_fixed_buddy, | ||
1189 | /* .free_fixed not needed. */ | ||
1190 | |||
1191 | .reserve_carveout = nvgpu_buddy_reserve_co, | ||
1192 | .release_carveout = nvgpu_buddy_release_co, | ||
1193 | |||
1194 | .base = nvgpu_buddy_alloc_base, | ||
1195 | .length = nvgpu_buddy_alloc_length, | ||
1196 | .end = nvgpu_buddy_alloc_end, | ||
1197 | .inited = nvgpu_buddy_alloc_inited, | ||
1198 | .space = nvgpu_buddy_alloc_space, | ||
1199 | |||
1200 | .fini = nvgpu_buddy_allocator_destroy, | ||
1201 | |||
1202 | .print_stats = nvgpu_buddy_print_stats, | ||
1203 | }; | ||
1204 | |||
1205 | /* | ||
1206 | * Initialize a buddy allocator. Returns 0 on success. This allocator does | ||
1207 | * not necessarily manage bytes. It manages distinct ranges of resources. This | ||
1208 | * allows the allocator to work for things like comp_tags, semaphores, etc. | ||
1209 | * | ||
1210 | * @allocator: Ptr to an allocator struct to init. | ||
1211 | * @vm: GPU VM to associate this allocator with. Can be NULL. Will be used to | ||
1212 | * get PTE size for GVA spaces. | ||
1213 | * @name: Name of the allocator. Doesn't have to be static storage. | ||
1214 | * @base: The base address of the resource pool being managed. | ||
1215 | * @size: Number of resources in the pool. | ||
1216 | * @blk_size: Minimum number of resources to allocate at once. For things like | ||
1217 | * semaphores this is 1. For GVA this might be as much as 64k. This | ||
1218 | * corresponds to order 0. Must be power of 2. | ||
1219 | * @max_order: Pick a maximum order. If you leave this as 0, the buddy allocator | ||
1220 | * will try and pick a reasonable max order. | ||
1221 | * @flags: Extra flags necessary. See GPU_BALLOC_*. | ||
1222 | */ | ||
1223 | int __nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, | ||
1224 | struct vm_gk20a *vm, const char *name, | ||
1225 | u64 base, u64 size, u64 blk_size, | ||
1226 | u64 max_order, u64 flags) | ||
1227 | { | ||
1228 | int err; | ||
1229 | u64 pde_size; | ||
1230 | struct nvgpu_buddy_allocator *a; | ||
1231 | |||
1232 | /* blk_size must be greater than 0 and a power of 2. */ | ||
1233 | if (blk_size == 0) | ||
1234 | return -EINVAL; | ||
1235 | if (blk_size & (blk_size - 1)) | ||
1236 | return -EINVAL; | ||
1237 | |||
1238 | if (max_order > GPU_BALLOC_MAX_ORDER) | ||
1239 | return -EINVAL; | ||
1240 | |||
1241 | /* If this is to manage a GVA space we need a VM. */ | ||
1242 | if (flags & GPU_ALLOC_GVA_SPACE && !vm) | ||
1243 | return -EINVAL; | ||
1244 | |||
1245 | a = kzalloc(sizeof(struct nvgpu_buddy_allocator), GFP_KERNEL); | ||
1246 | if (!a) | ||
1247 | return -ENOMEM; | ||
1248 | |||
1249 | err = __nvgpu_alloc_common_init(__a, name, a, false, &buddy_ops); | ||
1250 | if (err) | ||
1251 | goto fail; | ||
1252 | |||
1253 | a->base = base; | ||
1254 | a->length = size; | ||
1255 | a->blk_size = blk_size; | ||
1256 | a->blk_shift = __ffs(blk_size); | ||
1257 | a->owner = __a; | ||
1258 | |||
1259 | /* | ||
1260 | * If base is 0 then modfy base to be the size of one block so that we | ||
1261 | * can return errors by returning addr == 0. | ||
1262 | */ | ||
1263 | if (a->base == 0) { | ||
1264 | a->base = a->blk_size; | ||
1265 | a->length -= a->blk_size; | ||
1266 | } | ||
1267 | |||
1268 | a->vm = vm; | ||
1269 | if (flags & GPU_ALLOC_GVA_SPACE) { | ||
1270 | pde_size = ((u64)vm->big_page_size) << 10; | ||
1271 | a->pte_blk_order = balloc_get_order(a, pde_size); | ||
1272 | } | ||
1273 | |||
1274 | /* | ||
1275 | * When we have a GVA space with big_pages enabled the size and base | ||
1276 | * must be PDE aligned. If big_pages are not enabled then this | ||
1277 | * requirement is not necessary. | ||
1278 | */ | ||
1279 | if (flags & GPU_ALLOC_GVA_SPACE && vm->big_pages && | ||
1280 | (base & ((vm->big_page_size << 10) - 1) || | ||
1281 | size & ((vm->big_page_size << 10) - 1))) | ||
1282 | return -EINVAL; | ||
1283 | |||
1284 | a->flags = flags; | ||
1285 | a->max_order = max_order; | ||
1286 | |||
1287 | balloc_allocator_align(a); | ||
1288 | balloc_compute_max_order(a); | ||
1289 | |||
1290 | /* Shared buddy kmem_cache for all allocators. */ | ||
1291 | if (!buddy_cache) | ||
1292 | buddy_cache = KMEM_CACHE(nvgpu_buddy, 0); | ||
1293 | if (!buddy_cache) { | ||
1294 | err = -ENOMEM; | ||
1295 | goto fail; | ||
1296 | } | ||
1297 | |||
1298 | a->alloced_buddies = RB_ROOT; | ||
1299 | a->fixed_allocs = RB_ROOT; | ||
1300 | INIT_LIST_HEAD(&a->co_list); | ||
1301 | err = balloc_init_lists(a); | ||
1302 | if (err) | ||
1303 | goto fail; | ||
1304 | |||
1305 | wmb(); | ||
1306 | a->initialized = 1; | ||
1307 | |||
1308 | nvgpu_init_alloc_debug(g, __a); | ||
1309 | alloc_dbg(__a, "New allocator: type buddy\n"); | ||
1310 | alloc_dbg(__a, " base 0x%llx\n", a->base); | ||
1311 | alloc_dbg(__a, " size 0x%llx\n", a->length); | ||
1312 | alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); | ||
1313 | alloc_dbg(__a, " max_order %llu\n", a->max_order); | ||
1314 | alloc_dbg(__a, " flags 0x%llx\n", a->flags); | ||
1315 | |||
1316 | return 0; | ||
1317 | |||
1318 | fail: | ||
1319 | kfree(a); | ||
1320 | return err; | ||
1321 | } | ||
1322 | |||
1323 | int nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *a, | ||
1324 | const char *name, u64 base, u64 size, | ||
1325 | u64 blk_size, u64 flags) | ||
1326 | { | ||
1327 | return __nvgpu_buddy_allocator_init(g, a, NULL, name, | ||
1328 | base, size, blk_size, 0, 0); | ||
1329 | } | ||
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator_priv.h b/drivers/gpu/nvgpu/common/mm/buddy_allocator_priv.h new file mode 100644 index 00000000..50a11f14 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator_priv.h | |||
@@ -0,0 +1,192 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef BUDDY_ALLOCATOR_PRIV_H | ||
18 | #define BUDDY_ALLOCATOR_PRIV_H | ||
19 | |||
20 | #include <linux/list.h> | ||
21 | #include <linux/rbtree.h> | ||
22 | |||
23 | struct nvgpu_allocator; | ||
24 | struct vm_gk20a; | ||
25 | |||
26 | /* | ||
27 | * Each buddy is an element in a binary tree. | ||
28 | */ | ||
29 | struct nvgpu_buddy { | ||
30 | struct nvgpu_buddy *parent; /* Parent node. */ | ||
31 | struct nvgpu_buddy *buddy; /* This node's buddy. */ | ||
32 | struct nvgpu_buddy *left; /* Lower address sub-node. */ | ||
33 | struct nvgpu_buddy *right; /* Higher address sub-node. */ | ||
34 | |||
35 | struct list_head buddy_entry; /* List entry for various lists. */ | ||
36 | struct rb_node alloced_entry; /* RB tree of allocations. */ | ||
37 | |||
38 | u64 start; /* Start address of this buddy. */ | ||
39 | u64 end; /* End address of this buddy. */ | ||
40 | u64 order; /* Buddy order. */ | ||
41 | |||
42 | #define BALLOC_BUDDY_ALLOCED 0x1 | ||
43 | #define BALLOC_BUDDY_SPLIT 0x2 | ||
44 | #define BALLOC_BUDDY_IN_LIST 0x4 | ||
45 | int flags; /* List of associated flags. */ | ||
46 | |||
47 | /* | ||
48 | * Size of the PDE this buddy is using. This allows for grouping like | ||
49 | * sized allocations into the same PDE. This uses the gmmu_pgsz_gk20a | ||
50 | * enum except for the BALLOC_PTE_SIZE_ANY specifier. | ||
51 | */ | ||
52 | #define BALLOC_PTE_SIZE_ANY -1 | ||
53 | int pte_size; | ||
54 | }; | ||
55 | |||
56 | #define __buddy_flag_ops(flag, flag_up) \ | ||
57 | static inline int buddy_is_ ## flag(struct nvgpu_buddy *b) \ | ||
58 | { \ | ||
59 | return b->flags & BALLOC_BUDDY_ ## flag_up; \ | ||
60 | } \ | ||
61 | static inline void buddy_set_ ## flag(struct nvgpu_buddy *b) \ | ||
62 | { \ | ||
63 | b->flags |= BALLOC_BUDDY_ ## flag_up; \ | ||
64 | } \ | ||
65 | static inline void buddy_clr_ ## flag(struct nvgpu_buddy *b) \ | ||
66 | { \ | ||
67 | b->flags &= ~BALLOC_BUDDY_ ## flag_up; \ | ||
68 | } | ||
69 | |||
70 | /* | ||
71 | * int buddy_is_alloced(struct nvgpu_buddy *b); | ||
72 | * void buddy_set_alloced(struct nvgpu_buddy *b); | ||
73 | * void buddy_clr_alloced(struct nvgpu_buddy *b); | ||
74 | * | ||
75 | * int buddy_is_split(struct nvgpu_buddy *b); | ||
76 | * void buddy_set_split(struct nvgpu_buddy *b); | ||
77 | * void buddy_clr_split(struct nvgpu_buddy *b); | ||
78 | * | ||
79 | * int buddy_is_in_list(struct nvgpu_buddy *b); | ||
80 | * void buddy_set_in_list(struct nvgpu_buddy *b); | ||
81 | * void buddy_clr_in_list(struct nvgpu_buddy *b); | ||
82 | */ | ||
83 | __buddy_flag_ops(alloced, ALLOCED); | ||
84 | __buddy_flag_ops(split, SPLIT); | ||
85 | __buddy_flag_ops(in_list, IN_LIST); | ||
86 | |||
87 | /* | ||
88 | * Keeps info for a fixed allocation. | ||
89 | */ | ||
90 | struct nvgpu_fixed_alloc { | ||
91 | struct list_head buddies; /* List of buddies. */ | ||
92 | struct rb_node alloced_entry; /* RB tree of fixed allocations. */ | ||
93 | |||
94 | u64 start; /* Start of fixed block. */ | ||
95 | u64 end; /* End address. */ | ||
96 | }; | ||
97 | |||
98 | /* | ||
99 | * GPU buddy allocator for the various GPU address spaces. Each addressable unit | ||
100 | * doesn't have to correspond to a byte. In some cases each unit is a more | ||
101 | * complex object such as a comp_tag line or the like. | ||
102 | * | ||
103 | * The max order is computed based on the size of the minimum order and the size | ||
104 | * of the address space. | ||
105 | * | ||
106 | * order_size is the size of an order 0 buddy. | ||
107 | */ | ||
108 | struct nvgpu_buddy_allocator { | ||
109 | struct nvgpu_allocator *owner; /* Owner of this buddy allocator. */ | ||
110 | struct vm_gk20a *vm; /* Parent VM - can be NULL. */ | ||
111 | |||
112 | u64 base; /* Base address of the space. */ | ||
113 | u64 length; /* Length of the space. */ | ||
114 | u64 blk_size; /* Size of order 0 allocation. */ | ||
115 | u64 blk_shift; /* Shift to divide by blk_size. */ | ||
116 | |||
117 | /* Internal stuff. */ | ||
118 | u64 start; /* Real start (aligned to blk_size). */ | ||
119 | u64 end; /* Real end, trimmed if needed. */ | ||
120 | u64 count; /* Count of objects in space. */ | ||
121 | u64 blks; /* Count of blks in the space. */ | ||
122 | u64 max_order; /* Specific maximum order. */ | ||
123 | |||
124 | struct rb_root alloced_buddies; /* Outstanding allocations. */ | ||
125 | struct rb_root fixed_allocs; /* Outstanding fixed allocations. */ | ||
126 | |||
127 | struct list_head co_list; | ||
128 | |||
129 | /* | ||
130 | * Impose an upper bound on the maximum order. | ||
131 | */ | ||
132 | #define GPU_BALLOC_ORDER_LIST_LEN (GPU_BALLOC_MAX_ORDER + 1) | ||
133 | |||
134 | struct list_head buddy_list[GPU_BALLOC_ORDER_LIST_LEN]; | ||
135 | u64 buddy_list_len[GPU_BALLOC_ORDER_LIST_LEN]; | ||
136 | u64 buddy_list_split[GPU_BALLOC_ORDER_LIST_LEN]; | ||
137 | u64 buddy_list_alloced[GPU_BALLOC_ORDER_LIST_LEN]; | ||
138 | |||
139 | /* | ||
140 | * This is for when the allocator is managing a GVA space (the | ||
141 | * GPU_ALLOC_GVA_SPACE bit is set in @flags). This requires | ||
142 | * that we group like sized allocations into PDE blocks. | ||
143 | */ | ||
144 | u64 pte_blk_order; | ||
145 | |||
146 | int initialized; | ||
147 | int alloc_made; /* True after the first alloc. */ | ||
148 | |||
149 | u64 flags; | ||
150 | |||
151 | u64 bytes_alloced; | ||
152 | u64 bytes_alloced_real; | ||
153 | u64 bytes_freed; | ||
154 | }; | ||
155 | |||
156 | static inline struct nvgpu_buddy_allocator *buddy_allocator( | ||
157 | struct nvgpu_allocator *a) | ||
158 | { | ||
159 | return (struct nvgpu_buddy_allocator *)(a)->priv; | ||
160 | } | ||
161 | |||
162 | static inline struct list_head *balloc_get_order_list( | ||
163 | struct nvgpu_buddy_allocator *a, int order) | ||
164 | { | ||
165 | return &a->buddy_list[order]; | ||
166 | } | ||
167 | |||
168 | static inline u64 balloc_order_to_len(struct nvgpu_buddy_allocator *a, | ||
169 | int order) | ||
170 | { | ||
171 | return (1 << order) * a->blk_size; | ||
172 | } | ||
173 | |||
174 | static inline u64 balloc_base_shift(struct nvgpu_buddy_allocator *a, | ||
175 | u64 base) | ||
176 | { | ||
177 | return base - a->start; | ||
178 | } | ||
179 | |||
180 | static inline u64 balloc_base_unshift(struct nvgpu_buddy_allocator *a, | ||
181 | u64 base) | ||
182 | { | ||
183 | return base + a->start; | ||
184 | } | ||
185 | |||
186 | static inline struct nvgpu_allocator *balloc_owner( | ||
187 | struct nvgpu_buddy_allocator *a) | ||
188 | { | ||
189 | return a->owner; | ||
190 | } | ||
191 | |||
192 | #endif | ||
diff --git a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c new file mode 100644 index 00000000..e3063a42 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c | |||
@@ -0,0 +1,207 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/slab.h> | ||
19 | #include <linux/vmalloc.h> | ||
20 | #include <linux/atomic.h> | ||
21 | |||
22 | #include <nvgpu/allocator.h> | ||
23 | |||
24 | #include "lockless_allocator_priv.h" | ||
25 | |||
26 | static u64 nvgpu_lockless_alloc_length(struct nvgpu_allocator *a) | ||
27 | { | ||
28 | struct nvgpu_lockless_allocator *pa = a->priv; | ||
29 | |||
30 | return pa->length; | ||
31 | } | ||
32 | |||
33 | static u64 nvgpu_lockless_alloc_base(struct nvgpu_allocator *a) | ||
34 | { | ||
35 | struct nvgpu_lockless_allocator *pa = a->priv; | ||
36 | |||
37 | return pa->base; | ||
38 | } | ||
39 | |||
40 | static int nvgpu_lockless_alloc_inited(struct nvgpu_allocator *a) | ||
41 | { | ||
42 | struct nvgpu_lockless_allocator *pa = a->priv; | ||
43 | int inited = pa->inited; | ||
44 | |||
45 | rmb(); | ||
46 | return inited; | ||
47 | } | ||
48 | |||
49 | static u64 nvgpu_lockless_alloc_end(struct nvgpu_allocator *a) | ||
50 | { | ||
51 | struct nvgpu_lockless_allocator *pa = a->priv; | ||
52 | |||
53 | return pa->base + pa->length; | ||
54 | } | ||
55 | |||
56 | static u64 nvgpu_lockless_alloc(struct nvgpu_allocator *a, u64 len) | ||
57 | { | ||
58 | struct nvgpu_lockless_allocator *pa = a->priv; | ||
59 | int head, new_head, ret; | ||
60 | u64 addr = 0; | ||
61 | |||
62 | if (len != pa->blk_size) | ||
63 | return 0; | ||
64 | |||
65 | head = ACCESS_ONCE(pa->head); | ||
66 | while (head >= 0) { | ||
67 | new_head = ACCESS_ONCE(pa->next[head]); | ||
68 | ret = cmpxchg(&pa->head, head, new_head); | ||
69 | if (ret == head) { | ||
70 | addr = pa->base + head * pa->blk_size; | ||
71 | atomic_inc(&pa->nr_allocs); | ||
72 | alloc_dbg(a, "Alloc node # %d @ addr 0x%llx\n", head, | ||
73 | addr); | ||
74 | break; | ||
75 | } | ||
76 | head = ACCESS_ONCE(pa->head); | ||
77 | } | ||
78 | return addr; | ||
79 | } | ||
80 | |||
81 | static void nvgpu_lockless_free(struct nvgpu_allocator *a, u64 addr) | ||
82 | { | ||
83 | struct nvgpu_lockless_allocator *pa = a->priv; | ||
84 | int head, ret; | ||
85 | u64 cur_idx, rem; | ||
86 | |||
87 | cur_idx = addr - pa->base; | ||
88 | rem = do_div(cur_idx, pa->blk_size); | ||
89 | |||
90 | while (1) { | ||
91 | head = ACCESS_ONCE(pa->head); | ||
92 | ACCESS_ONCE(pa->next[cur_idx]) = head; | ||
93 | ret = cmpxchg(&pa->head, head, cur_idx); | ||
94 | if (ret == head) { | ||
95 | atomic_dec(&pa->nr_allocs); | ||
96 | alloc_dbg(a, "Free node # %llu\n", cur_idx); | ||
97 | break; | ||
98 | } | ||
99 | } | ||
100 | } | ||
101 | |||
102 | static void nvgpu_lockless_alloc_destroy(struct nvgpu_allocator *a) | ||
103 | { | ||
104 | struct nvgpu_lockless_allocator *pa = a->priv; | ||
105 | |||
106 | nvgpu_fini_alloc_debug(a); | ||
107 | |||
108 | vfree(pa->next); | ||
109 | kfree(pa); | ||
110 | } | ||
111 | |||
112 | static void nvgpu_lockless_print_stats(struct nvgpu_allocator *a, | ||
113 | struct seq_file *s, int lock) | ||
114 | { | ||
115 | struct nvgpu_lockless_allocator *pa = a->priv; | ||
116 | |||
117 | __alloc_pstat(s, a, "Lockless allocator params:\n"); | ||
118 | __alloc_pstat(s, a, " start = 0x%llx\n", pa->base); | ||
119 | __alloc_pstat(s, a, " end = 0x%llx\n", pa->base + pa->length); | ||
120 | |||
121 | /* Actual stats. */ | ||
122 | __alloc_pstat(s, a, "Stats:\n"); | ||
123 | __alloc_pstat(s, a, " Number allocs = %d\n", | ||
124 | atomic_read(&pa->nr_allocs)); | ||
125 | __alloc_pstat(s, a, " Number free = %d\n", | ||
126 | pa->nr_nodes - atomic_read(&pa->nr_allocs)); | ||
127 | } | ||
128 | |||
129 | static const struct nvgpu_allocator_ops pool_ops = { | ||
130 | .alloc = nvgpu_lockless_alloc, | ||
131 | .free = nvgpu_lockless_free, | ||
132 | |||
133 | .base = nvgpu_lockless_alloc_base, | ||
134 | .length = nvgpu_lockless_alloc_length, | ||
135 | .end = nvgpu_lockless_alloc_end, | ||
136 | .inited = nvgpu_lockless_alloc_inited, | ||
137 | |||
138 | .fini = nvgpu_lockless_alloc_destroy, | ||
139 | |||
140 | .print_stats = nvgpu_lockless_print_stats, | ||
141 | }; | ||
142 | |||
143 | int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, | ||
144 | const char *name, u64 base, u64 length, | ||
145 | u64 blk_size, u64 flags) | ||
146 | { | ||
147 | int i; | ||
148 | int err; | ||
149 | int nr_nodes; | ||
150 | u64 count, rem; | ||
151 | struct nvgpu_lockless_allocator *a; | ||
152 | |||
153 | if (!blk_size) | ||
154 | return -EINVAL; | ||
155 | |||
156 | /* | ||
157 | * Ensure we have space for atleast one node & there's no overflow. | ||
158 | * In order to control memory footprint, we require count < INT_MAX | ||
159 | */ | ||
160 | count = length; | ||
161 | rem = do_div(count, blk_size); | ||
162 | if (!base || !count || count > INT_MAX) | ||
163 | return -EINVAL; | ||
164 | |||
165 | a = kzalloc(sizeof(struct nvgpu_lockless_allocator), GFP_KERNEL); | ||
166 | if (!a) | ||
167 | return -ENOMEM; | ||
168 | |||
169 | err = __nvgpu_alloc_common_init(__a, name, a, false, &pool_ops); | ||
170 | if (err) | ||
171 | goto fail; | ||
172 | |||
173 | a->next = vzalloc(sizeof(*a->next) * count); | ||
174 | if (!a->next) { | ||
175 | err = -ENOMEM; | ||
176 | goto fail; | ||
177 | } | ||
178 | |||
179 | /* chain the elements together to form the initial free list */ | ||
180 | nr_nodes = (int)count; | ||
181 | for (i = 0; i < nr_nodes; i++) | ||
182 | a->next[i] = i + 1; | ||
183 | a->next[nr_nodes - 1] = -1; | ||
184 | |||
185 | a->base = base; | ||
186 | a->length = length; | ||
187 | a->blk_size = blk_size; | ||
188 | a->nr_nodes = nr_nodes; | ||
189 | a->flags = flags; | ||
190 | atomic_set(&a->nr_allocs, 0); | ||
191 | |||
192 | wmb(); | ||
193 | a->inited = true; | ||
194 | |||
195 | nvgpu_init_alloc_debug(g, __a); | ||
196 | alloc_dbg(__a, "New allocator: type lockless\n"); | ||
197 | alloc_dbg(__a, " base 0x%llx\n", a->base); | ||
198 | alloc_dbg(__a, " nodes %d\n", a->nr_nodes); | ||
199 | alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); | ||
200 | alloc_dbg(__a, " flags 0x%llx\n", a->flags); | ||
201 | |||
202 | return 0; | ||
203 | |||
204 | fail: | ||
205 | kfree(a); | ||
206 | return err; | ||
207 | } | ||
diff --git a/drivers/gpu/nvgpu/common/mm/lockless_allocator_priv.h b/drivers/gpu/nvgpu/common/mm/lockless_allocator_priv.h new file mode 100644 index 00000000..32421ac1 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/lockless_allocator_priv.h | |||
@@ -0,0 +1,121 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | /* | ||
18 | * Basics: | ||
19 | * | ||
20 | * - Lockless memory allocator for fixed-size structures, whose | ||
21 | * size is defined up front at init time. | ||
22 | * - Memory footprint scales linearly w/ the number of structures in | ||
23 | * the pool. It is ~= sizeof(int) * N. | ||
24 | * - Memory is pre-allocated by the client. The allocator itself | ||
25 | * only computes the addresses for allocations. | ||
26 | * - Limit of MAX_INT nodes that the allocator can be responsible for. | ||
27 | * | ||
28 | * Implementation details: | ||
29 | * | ||
30 | * The allocator maintains a single list of free nodes. We allocate & | ||
31 | * free nodes from the head of the list. We rely on the cmpxchg() operator | ||
32 | * to maintain atomicity on the head. | ||
33 | * | ||
34 | * So, both allocs & frees are O(1)!! | ||
35 | * | ||
36 | * -- Definitions -- | ||
37 | * Block Size - size of a single structure that this allocator will | ||
38 | * allocate. | ||
39 | * Node - one of the elements of size blk_size in the | ||
40 | * client-allocated buffer. | ||
41 | * Node Index - zero-based index of a node in the client-allocated | ||
42 | * contiguous buffer. | ||
43 | * | ||
44 | * -- Initial State -- | ||
45 | * We maintain the following to track the state of the free list: | ||
46 | * | ||
47 | * 1) A "head" index to track the index of the first free node in the list | ||
48 | * 2) A "next" array to track the index of the next free node in the list | ||
49 | * for every node. So next[head], will give the index to the 2nd free | ||
50 | * element in the list. | ||
51 | * | ||
52 | * So, to begin with, the free list consists of all node indices, and each | ||
53 | * position in the next array contains index N + 1: | ||
54 | * | ||
55 | * head = 0 | ||
56 | * next = [1, 2, 3, 4, -1] : Example for a user-allocated buffer of 5 nodes | ||
57 | * free_list = 0->1->2->3->4->-1 | ||
58 | * | ||
59 | * -- Allocations -- | ||
60 | * 1) Read the current head (aka acq_head) | ||
61 | * 2) Read next[acq_head], to get the 2nd free element (aka new_head) | ||
62 | * 3) cmp_xchg(&head, acq_head, new_head) | ||
63 | * 4) If it succeeds, compute the address of the node, based on | ||
64 | * base address, blk_size, & acq_head. | ||
65 | * | ||
66 | * head = 1; | ||
67 | * next = [1, 2, 3, 4, -1] : Example after allocating Node #0 | ||
68 | * free_list = 1->2->3->4->-1 | ||
69 | * | ||
70 | * head = 2; | ||
71 | * next = [1, 2, 3, 4, -1] : Example after allocating Node #1 | ||
72 | * free_list = 2->3->4->-1 | ||
73 | * | ||
74 | * -- Frees -- | ||
75 | * 1) Based on the address to be freed, calculate the index of the node | ||
76 | * being freed (cur_idx) | ||
77 | * 2) Read the current head (old_head) | ||
78 | * 3) So the freed node is going to go at the head of the list, and we | ||
79 | * want to put the old_head after it. So next[cur_idx] = old_head | ||
80 | * 4) cmpxchg(head, old_head, cur_idx) | ||
81 | * | ||
82 | * head = 0 | ||
83 | * next = [2, 2, 3, 4, -1] | ||
84 | * free_list = 0->2->3->4->-1 : Example after freeing Node #0 | ||
85 | * | ||
86 | * head = 1 | ||
87 | * next = [2, 0, 3, 4, -1] | ||
88 | * free_list = 1->0->2->3->4->-1 : Example after freeing Node #1 | ||
89 | */ | ||
90 | |||
91 | #ifndef LOCKLESS_ALLOCATOR_PRIV_H | ||
92 | #define LOCKLESS_ALLOCATOR_PRIV_H | ||
93 | |||
94 | struct nvgpu_allocator; | ||
95 | |||
96 | struct nvgpu_lockless_allocator { | ||
97 | struct nvgpu_allocator *owner; | ||
98 | |||
99 | u64 base; /* Base address of the space. */ | ||
100 | u64 length; /* Length of the space. */ | ||
101 | u64 blk_size; /* Size of the structure being allocated */ | ||
102 | int nr_nodes; /* Number of nodes available for allocation */ | ||
103 | |||
104 | int *next; /* An array holding the next indices per node */ | ||
105 | int head; /* Current node at the top of the stack */ | ||
106 | |||
107 | u64 flags; | ||
108 | |||
109 | bool inited; | ||
110 | |||
111 | /* Statistics */ | ||
112 | atomic_t nr_allocs; | ||
113 | }; | ||
114 | |||
115 | static inline struct nvgpu_lockless_allocator *lockless_allocator( | ||
116 | struct nvgpu_allocator *a) | ||
117 | { | ||
118 | return (struct nvgpu_lockless_allocator *)(a)->priv; | ||
119 | } | ||
120 | |||
121 | #endif | ||
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c new file mode 100644 index 00000000..ebd779c0 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c | |||
@@ -0,0 +1,212 @@ | |||
1 | /* | ||
2 | * gk20a allocator | ||
3 | * | ||
4 | * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/slab.h> | ||
21 | |||
22 | #include <nvgpu/allocator.h> | ||
23 | |||
24 | #include "gk20a/gk20a.h" | ||
25 | #include "gk20a/mm_gk20a.h" | ||
26 | #include "gk20a/platform_gk20a.h" | ||
27 | |||
28 | u32 nvgpu_alloc_tracing_on; | ||
29 | |||
30 | u64 nvgpu_alloc_length(struct nvgpu_allocator *a) | ||
31 | { | ||
32 | if (a->ops->length) | ||
33 | return a->ops->length(a); | ||
34 | |||
35 | return 0; | ||
36 | } | ||
37 | |||
38 | u64 nvgpu_alloc_base(struct nvgpu_allocator *a) | ||
39 | { | ||
40 | if (a->ops->base) | ||
41 | return a->ops->base(a); | ||
42 | |||
43 | return 0; | ||
44 | } | ||
45 | |||
46 | u64 nvgpu_alloc_initialized(struct nvgpu_allocator *a) | ||
47 | { | ||
48 | if (!a->ops || !a->ops->inited) | ||
49 | return 0; | ||
50 | |||
51 | return a->ops->inited(a); | ||
52 | } | ||
53 | |||
54 | u64 nvgpu_alloc_end(struct nvgpu_allocator *a) | ||
55 | { | ||
56 | if (a->ops->end) | ||
57 | return a->ops->end(a); | ||
58 | |||
59 | return 0; | ||
60 | } | ||
61 | |||
62 | u64 nvgpu_alloc_space(struct nvgpu_allocator *a) | ||
63 | { | ||
64 | if (a->ops->space) | ||
65 | return a->ops->space(a); | ||
66 | |||
67 | return 0; | ||
68 | } | ||
69 | |||
70 | u64 nvgpu_alloc(struct nvgpu_allocator *a, u64 len) | ||
71 | { | ||
72 | return a->ops->alloc(a, len); | ||
73 | } | ||
74 | |||
75 | void nvgpu_free(struct nvgpu_allocator *a, u64 addr) | ||
76 | { | ||
77 | a->ops->free(a, addr); | ||
78 | } | ||
79 | |||
80 | u64 nvgpu_alloc_fixed(struct nvgpu_allocator *a, u64 base, u64 len) | ||
81 | { | ||
82 | if (a->ops->alloc_fixed) | ||
83 | return a->ops->alloc_fixed(a, base, len); | ||
84 | |||
85 | return 0; | ||
86 | } | ||
87 | |||
88 | void nvgpu_free_fixed(struct nvgpu_allocator *a, u64 base, u64 len) | ||
89 | { | ||
90 | /* | ||
91 | * If this operation is not defined for the allocator then just do | ||
92 | * nothing. The alternative would be to fall back on the regular | ||
93 | * free but that may be harmful in unexpected ways. | ||
94 | */ | ||
95 | if (a->ops->free_fixed) | ||
96 | a->ops->free_fixed(a, base, len); | ||
97 | } | ||
98 | |||
99 | int nvgpu_alloc_reserve_carveout(struct nvgpu_allocator *a, | ||
100 | struct nvgpu_alloc_carveout *co) | ||
101 | { | ||
102 | if (a->ops->reserve_carveout) | ||
103 | return a->ops->reserve_carveout(a, co); | ||
104 | |||
105 | return -ENODEV; | ||
106 | } | ||
107 | |||
108 | void nvgpu_alloc_release_carveout(struct nvgpu_allocator *a, | ||
109 | struct nvgpu_alloc_carveout *co) | ||
110 | { | ||
111 | if (a->ops->release_carveout) | ||
112 | a->ops->release_carveout(a, co); | ||
113 | } | ||
114 | |||
115 | void nvgpu_alloc_destroy(struct nvgpu_allocator *a) | ||
116 | { | ||
117 | a->ops->fini(a); | ||
118 | memset(a, 0, sizeof(*a)); | ||
119 | } | ||
120 | |||
121 | /* | ||
122 | * Handle the common init stuff for a nvgpu_allocator. | ||
123 | */ | ||
124 | int __nvgpu_alloc_common_init(struct nvgpu_allocator *a, | ||
125 | const char *name, void *priv, bool dbg, | ||
126 | const struct nvgpu_allocator_ops *ops) | ||
127 | { | ||
128 | if (!ops) | ||
129 | return -EINVAL; | ||
130 | |||
131 | /* | ||
132 | * This is the bare minimum operations required for a sensible | ||
133 | * allocator. | ||
134 | */ | ||
135 | if (!ops->alloc || !ops->free || !ops->fini) | ||
136 | return -EINVAL; | ||
137 | |||
138 | a->ops = ops; | ||
139 | a->priv = priv; | ||
140 | a->debug = dbg; | ||
141 | |||
142 | mutex_init(&a->lock); | ||
143 | |||
144 | strlcpy(a->name, name, sizeof(a->name)); | ||
145 | |||
146 | return 0; | ||
147 | } | ||
148 | |||
149 | void nvgpu_alloc_print_stats(struct nvgpu_allocator *__a, | ||
150 | struct seq_file *s, int lock) | ||
151 | { | ||
152 | __a->ops->print_stats(__a, s, lock); | ||
153 | } | ||
154 | |||
155 | #ifdef CONFIG_DEBUG_FS | ||
156 | static int __alloc_show(struct seq_file *s, void *unused) | ||
157 | { | ||
158 | struct nvgpu_allocator *a = s->private; | ||
159 | |||
160 | nvgpu_alloc_print_stats(a, s, 1); | ||
161 | |||
162 | return 0; | ||
163 | } | ||
164 | |||
165 | static int __alloc_open(struct inode *inode, struct file *file) | ||
166 | { | ||
167 | return single_open(file, __alloc_show, inode->i_private); | ||
168 | } | ||
169 | |||
170 | static const struct file_operations __alloc_fops = { | ||
171 | .open = __alloc_open, | ||
172 | .read = seq_read, | ||
173 | .llseek = seq_lseek, | ||
174 | .release = single_release, | ||
175 | }; | ||
176 | #endif | ||
177 | |||
178 | void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a) | ||
179 | { | ||
180 | #ifdef CONFIG_DEBUG_FS | ||
181 | if (!g->debugfs_allocators) | ||
182 | return; | ||
183 | |||
184 | a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO, | ||
185 | g->debugfs_allocators, | ||
186 | a, &__alloc_fops); | ||
187 | #endif | ||
188 | } | ||
189 | |||
190 | void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a) | ||
191 | { | ||
192 | #ifdef CONFIG_DEBUG_FS | ||
193 | if (!IS_ERR_OR_NULL(a->debugfs_entry)) | ||
194 | debugfs_remove(a->debugfs_entry); | ||
195 | #endif | ||
196 | } | ||
197 | |||
198 | void nvgpu_alloc_debugfs_init(struct device *dev) | ||
199 | { | ||
200 | #ifdef CONFIG_DEBUG_FS | ||
201 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
202 | struct dentry *gpu_root = platform->debugfs; | ||
203 | struct gk20a *g = get_gk20a(dev); | ||
204 | |||
205 | g->debugfs_allocators = debugfs_create_dir("allocators", gpu_root); | ||
206 | if (IS_ERR_OR_NULL(g->debugfs_allocators)) | ||
207 | return; | ||
208 | |||
209 | debugfs_create_u32("tracing", 0664, g->debugfs_allocators, | ||
210 | &nvgpu_alloc_tracing_on); | ||
211 | #endif | ||
212 | } | ||
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c new file mode 100644 index 00000000..c61b2238 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c | |||
@@ -0,0 +1,937 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/slab.h> | ||
19 | #include <linux/bitops.h> | ||
20 | #include <linux/mm.h> | ||
21 | |||
22 | #include <nvgpu/allocator.h> | ||
23 | #include <nvgpu/page_allocator.h> | ||
24 | |||
25 | #include "buddy_allocator_priv.h" | ||
26 | |||
27 | #define palloc_dbg(a, fmt, arg...) \ | ||
28 | alloc_dbg(palloc_owner(a), fmt, ##arg) | ||
29 | |||
30 | static struct kmem_cache *page_alloc_cache; | ||
31 | static struct kmem_cache *page_alloc_chunk_cache; | ||
32 | static struct kmem_cache *page_alloc_slab_page_cache; | ||
33 | static DEFINE_MUTEX(meta_data_cache_lock); | ||
34 | |||
35 | /* | ||
36 | * Handle the book-keeping for these operations. | ||
37 | */ | ||
38 | static inline void add_slab_page_to_empty(struct page_alloc_slab *slab, | ||
39 | struct page_alloc_slab_page *page) | ||
40 | { | ||
41 | BUG_ON(page->state != SP_NONE); | ||
42 | list_add(&page->list_entry, &slab->empty); | ||
43 | slab->nr_empty++; | ||
44 | page->state = SP_EMPTY; | ||
45 | } | ||
46 | static inline void add_slab_page_to_partial(struct page_alloc_slab *slab, | ||
47 | struct page_alloc_slab_page *page) | ||
48 | { | ||
49 | BUG_ON(page->state != SP_NONE); | ||
50 | list_add(&page->list_entry, &slab->partial); | ||
51 | slab->nr_partial++; | ||
52 | page->state = SP_PARTIAL; | ||
53 | } | ||
54 | static inline void add_slab_page_to_full(struct page_alloc_slab *slab, | ||
55 | struct page_alloc_slab_page *page) | ||
56 | { | ||
57 | BUG_ON(page->state != SP_NONE); | ||
58 | list_add(&page->list_entry, &slab->full); | ||
59 | slab->nr_full++; | ||
60 | page->state = SP_FULL; | ||
61 | } | ||
62 | |||
63 | static inline void del_slab_page_from_empty(struct page_alloc_slab *slab, | ||
64 | struct page_alloc_slab_page *page) | ||
65 | { | ||
66 | list_del_init(&page->list_entry); | ||
67 | slab->nr_empty--; | ||
68 | page->state = SP_NONE; | ||
69 | } | ||
70 | static inline void del_slab_page_from_partial(struct page_alloc_slab *slab, | ||
71 | struct page_alloc_slab_page *page) | ||
72 | { | ||
73 | list_del_init(&page->list_entry); | ||
74 | slab->nr_partial--; | ||
75 | page->state = SP_NONE; | ||
76 | } | ||
77 | static inline void del_slab_page_from_full(struct page_alloc_slab *slab, | ||
78 | struct page_alloc_slab_page *page) | ||
79 | { | ||
80 | list_del_init(&page->list_entry); | ||
81 | slab->nr_full--; | ||
82 | page->state = SP_NONE; | ||
83 | } | ||
84 | |||
85 | static u64 nvgpu_page_alloc_length(struct nvgpu_allocator *a) | ||
86 | { | ||
87 | struct nvgpu_page_allocator *va = a->priv; | ||
88 | |||
89 | return nvgpu_alloc_length(&va->source_allocator); | ||
90 | } | ||
91 | |||
92 | static u64 nvgpu_page_alloc_base(struct nvgpu_allocator *a) | ||
93 | { | ||
94 | struct nvgpu_page_allocator *va = a->priv; | ||
95 | |||
96 | return nvgpu_alloc_base(&va->source_allocator); | ||
97 | } | ||
98 | |||
99 | static int nvgpu_page_alloc_inited(struct nvgpu_allocator *a) | ||
100 | { | ||
101 | struct nvgpu_page_allocator *va = a->priv; | ||
102 | |||
103 | return nvgpu_alloc_initialized(&va->source_allocator); | ||
104 | } | ||
105 | |||
106 | static u64 nvgpu_page_alloc_end(struct nvgpu_allocator *a) | ||
107 | { | ||
108 | struct nvgpu_page_allocator *va = a->priv; | ||
109 | |||
110 | return nvgpu_alloc_end(&va->source_allocator); | ||
111 | } | ||
112 | |||
113 | static u64 nvgpu_page_alloc_space(struct nvgpu_allocator *a) | ||
114 | { | ||
115 | struct nvgpu_page_allocator *va = a->priv; | ||
116 | |||
117 | return nvgpu_alloc_space(&va->source_allocator); | ||
118 | } | ||
119 | |||
120 | static int nvgpu_page_reserve_co(struct nvgpu_allocator *a, | ||
121 | struct nvgpu_alloc_carveout *co) | ||
122 | { | ||
123 | struct nvgpu_page_allocator *va = a->priv; | ||
124 | |||
125 | return nvgpu_alloc_reserve_carveout(&va->source_allocator, co); | ||
126 | } | ||
127 | |||
128 | static void nvgpu_page_release_co(struct nvgpu_allocator *a, | ||
129 | struct nvgpu_alloc_carveout *co) | ||
130 | { | ||
131 | struct nvgpu_page_allocator *va = a->priv; | ||
132 | |||
133 | nvgpu_alloc_release_carveout(&va->source_allocator, co); | ||
134 | } | ||
135 | |||
136 | static void __nvgpu_free_pages(struct nvgpu_page_allocator *a, | ||
137 | struct nvgpu_page_alloc *alloc, | ||
138 | bool free_buddy_alloc) | ||
139 | { | ||
140 | struct page_alloc_chunk *chunk; | ||
141 | |||
142 | while (!list_empty(&alloc->alloc_chunks)) { | ||
143 | chunk = list_first_entry(&alloc->alloc_chunks, | ||
144 | struct page_alloc_chunk, | ||
145 | list_entry); | ||
146 | list_del(&chunk->list_entry); | ||
147 | |||
148 | if (free_buddy_alloc) | ||
149 | nvgpu_free(&a->source_allocator, chunk->base); | ||
150 | kfree(chunk); | ||
151 | } | ||
152 | |||
153 | kfree(alloc); | ||
154 | } | ||
155 | |||
156 | static int __insert_page_alloc(struct nvgpu_page_allocator *a, | ||
157 | struct nvgpu_page_alloc *alloc) | ||
158 | { | ||
159 | struct rb_node **new = &a->allocs.rb_node; | ||
160 | struct rb_node *parent = NULL; | ||
161 | |||
162 | while (*new) { | ||
163 | struct nvgpu_page_alloc *tmp = | ||
164 | container_of(*new, struct nvgpu_page_alloc, | ||
165 | tree_entry); | ||
166 | |||
167 | parent = *new; | ||
168 | if (alloc->base < tmp->base) { | ||
169 | new = &((*new)->rb_left); | ||
170 | } else if (alloc->base > tmp->base) { | ||
171 | new = &((*new)->rb_right); | ||
172 | } else { | ||
173 | WARN(1, "Duplicate entries in allocated list!\n"); | ||
174 | return 0; | ||
175 | } | ||
176 | } | ||
177 | |||
178 | rb_link_node(&alloc->tree_entry, parent, new); | ||
179 | rb_insert_color(&alloc->tree_entry, &a->allocs); | ||
180 | |||
181 | return 0; | ||
182 | } | ||
183 | |||
184 | static struct nvgpu_page_alloc *__find_page_alloc( | ||
185 | struct nvgpu_page_allocator *a, | ||
186 | u64 addr) | ||
187 | { | ||
188 | struct rb_node *node = a->allocs.rb_node; | ||
189 | struct nvgpu_page_alloc *alloc; | ||
190 | |||
191 | while (node) { | ||
192 | alloc = container_of(node, struct nvgpu_page_alloc, tree_entry); | ||
193 | |||
194 | if (addr < alloc->base) | ||
195 | node = node->rb_left; | ||
196 | else if (addr > alloc->base) | ||
197 | node = node->rb_right; | ||
198 | else | ||
199 | break; | ||
200 | } | ||
201 | |||
202 | if (!node) | ||
203 | return NULL; | ||
204 | |||
205 | rb_erase(node, &a->allocs); | ||
206 | |||
207 | return alloc; | ||
208 | } | ||
209 | |||
210 | static struct page_alloc_slab_page *alloc_slab_page( | ||
211 | struct nvgpu_page_allocator *a, | ||
212 | struct page_alloc_slab *slab) | ||
213 | { | ||
214 | struct page_alloc_slab_page *slab_page; | ||
215 | |||
216 | slab_page = kmem_cache_alloc(page_alloc_slab_page_cache, GFP_KERNEL); | ||
217 | if (!slab_page) { | ||
218 | palloc_dbg(a, "OOM: unable to alloc slab_page struct!\n"); | ||
219 | return ERR_PTR(-ENOMEM); | ||
220 | } | ||
221 | |||
222 | memset(slab_page, 0, sizeof(*slab_page)); | ||
223 | |||
224 | slab_page->page_addr = nvgpu_alloc(&a->source_allocator, a->page_size); | ||
225 | if (!slab_page->page_addr) { | ||
226 | kfree(slab_page); | ||
227 | palloc_dbg(a, "OOM: vidmem is full!\n"); | ||
228 | return ERR_PTR(-ENOMEM); | ||
229 | } | ||
230 | |||
231 | INIT_LIST_HEAD(&slab_page->list_entry); | ||
232 | slab_page->slab_size = slab->slab_size; | ||
233 | slab_page->nr_objects = (u32)a->page_size / slab->slab_size; | ||
234 | slab_page->nr_objects_alloced = 0; | ||
235 | slab_page->owner = slab; | ||
236 | slab_page->state = SP_NONE; | ||
237 | |||
238 | a->pages_alloced++; | ||
239 | |||
240 | palloc_dbg(a, "Allocated new slab page @ 0x%012llx size=%u\n", | ||
241 | slab_page->page_addr, slab_page->slab_size); | ||
242 | |||
243 | return slab_page; | ||
244 | } | ||
245 | |||
246 | static void free_slab_page(struct nvgpu_page_allocator *a, | ||
247 | struct page_alloc_slab_page *slab_page) | ||
248 | { | ||
249 | palloc_dbg(a, "Freeing slab page @ 0x%012llx\n", slab_page->page_addr); | ||
250 | |||
251 | BUG_ON((slab_page->state != SP_NONE && slab_page->state != SP_EMPTY) || | ||
252 | slab_page->nr_objects_alloced != 0 || | ||
253 | slab_page->bitmap != 0); | ||
254 | |||
255 | nvgpu_free(&a->source_allocator, slab_page->page_addr); | ||
256 | a->pages_freed++; | ||
257 | |||
258 | kmem_cache_free(page_alloc_slab_page_cache, slab_page); | ||
259 | } | ||
260 | |||
261 | /* | ||
262 | * This expects @alloc to have 1 empty page_alloc_chunk already added to the | ||
263 | * alloc_chunks list. | ||
264 | */ | ||
265 | static int __do_slab_alloc(struct nvgpu_page_allocator *a, | ||
266 | struct page_alloc_slab *slab, | ||
267 | struct nvgpu_page_alloc *alloc) | ||
268 | { | ||
269 | struct page_alloc_slab_page *slab_page = NULL; | ||
270 | struct page_alloc_chunk *chunk; | ||
271 | unsigned long offs; | ||
272 | |||
273 | /* | ||
274 | * Check the partial and empty lists to see if we have some space | ||
275 | * readily available. Take the slab_page out of what ever list it | ||
276 | * was in since it may be put back into a different list later. | ||
277 | */ | ||
278 | if (!list_empty(&slab->partial)) { | ||
279 | slab_page = list_first_entry(&slab->partial, | ||
280 | struct page_alloc_slab_page, | ||
281 | list_entry); | ||
282 | del_slab_page_from_partial(slab, slab_page); | ||
283 | } else if (!list_empty(&slab->empty)) { | ||
284 | slab_page = list_first_entry(&slab->empty, | ||
285 | struct page_alloc_slab_page, | ||
286 | list_entry); | ||
287 | del_slab_page_from_empty(slab, slab_page); | ||
288 | } | ||
289 | |||
290 | if (!slab_page) { | ||
291 | slab_page = alloc_slab_page(a, slab); | ||
292 | if (IS_ERR(slab_page)) | ||
293 | return PTR_ERR(slab_page); | ||
294 | } | ||
295 | |||
296 | /* | ||
297 | * We now have a slab_page. Do the alloc. | ||
298 | */ | ||
299 | offs = bitmap_find_next_zero_area(&slab_page->bitmap, | ||
300 | slab_page->nr_objects, | ||
301 | 0, 1, 0); | ||
302 | if (offs >= slab_page->nr_objects) { | ||
303 | WARN(1, "Empty/partial slab with no free objects?"); | ||
304 | |||
305 | /* Add the buggy page to the full list... This isn't ideal. */ | ||
306 | add_slab_page_to_full(slab, slab_page); | ||
307 | return -ENOMEM; | ||
308 | } | ||
309 | |||
310 | bitmap_set(&slab_page->bitmap, offs, 1); | ||
311 | slab_page->nr_objects_alloced++; | ||
312 | |||
313 | if (slab_page->nr_objects_alloced < slab_page->nr_objects) | ||
314 | add_slab_page_to_partial(slab, slab_page); | ||
315 | else if (slab_page->nr_objects_alloced == slab_page->nr_objects) | ||
316 | add_slab_page_to_full(slab, slab_page); | ||
317 | else | ||
318 | BUG(); /* Should be impossible to hit this. */ | ||
319 | |||
320 | /* | ||
321 | * Handle building the nvgpu_page_alloc struct. We expect one | ||
322 | * page_alloc_chunk to be present. | ||
323 | */ | ||
324 | alloc->slab_page = slab_page; | ||
325 | alloc->nr_chunks = 1; | ||
326 | alloc->length = slab_page->slab_size; | ||
327 | alloc->base = slab_page->page_addr + (offs * slab_page->slab_size); | ||
328 | |||
329 | chunk = list_first_entry(&alloc->alloc_chunks, | ||
330 | struct page_alloc_chunk, list_entry); | ||
331 | chunk->base = alloc->base; | ||
332 | chunk->length = alloc->length; | ||
333 | |||
334 | return 0; | ||
335 | } | ||
336 | |||
337 | /* | ||
338 | * Allocate from a slab instead of directly from the page allocator. | ||
339 | */ | ||
340 | static struct nvgpu_page_alloc *__nvgpu_alloc_slab( | ||
341 | struct nvgpu_page_allocator *a, u64 len) | ||
342 | { | ||
343 | int err, slab_nr; | ||
344 | struct page_alloc_slab *slab; | ||
345 | struct nvgpu_page_alloc *alloc = NULL; | ||
346 | struct page_alloc_chunk *chunk = NULL; | ||
347 | |||
348 | /* | ||
349 | * Align the length to a page and then divide by the page size (4k for | ||
350 | * this code). ilog2() of that then gets us the correct slab to use. | ||
351 | */ | ||
352 | slab_nr = (int)ilog2(PAGE_ALIGN(len) >> 12); | ||
353 | slab = &a->slabs[slab_nr]; | ||
354 | |||
355 | alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL); | ||
356 | if (!alloc) { | ||
357 | palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n"); | ||
358 | goto fail; | ||
359 | } | ||
360 | chunk = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL); | ||
361 | if (!chunk) { | ||
362 | palloc_dbg(a, "OOM: could not alloc alloc_chunk struct!\n"); | ||
363 | goto fail; | ||
364 | } | ||
365 | |||
366 | INIT_LIST_HEAD(&alloc->alloc_chunks); | ||
367 | list_add(&chunk->list_entry, &alloc->alloc_chunks); | ||
368 | |||
369 | err = __do_slab_alloc(a, slab, alloc); | ||
370 | if (err) | ||
371 | goto fail; | ||
372 | |||
373 | palloc_dbg(a, "Alloc 0x%04llx sr=%d id=0x%010llx [slab]\n", | ||
374 | len, slab_nr, alloc->base); | ||
375 | a->nr_slab_allocs++; | ||
376 | |||
377 | return alloc; | ||
378 | |||
379 | fail: | ||
380 | kfree(alloc); | ||
381 | kfree(chunk); | ||
382 | return NULL; | ||
383 | } | ||
384 | |||
385 | static void __nvgpu_free_slab(struct nvgpu_page_allocator *a, | ||
386 | struct nvgpu_page_alloc *alloc) | ||
387 | { | ||
388 | struct page_alloc_slab_page *slab_page = alloc->slab_page; | ||
389 | struct page_alloc_slab *slab = slab_page->owner; | ||
390 | enum slab_page_state new_state; | ||
391 | int offs; | ||
392 | |||
393 | offs = (u32)(alloc->base - slab_page->page_addr) / slab_page->slab_size; | ||
394 | bitmap_clear(&slab_page->bitmap, offs, 1); | ||
395 | |||
396 | slab_page->nr_objects_alloced--; | ||
397 | |||
398 | if (slab_page->nr_objects_alloced == 0) | ||
399 | new_state = SP_EMPTY; | ||
400 | else | ||
401 | new_state = SP_PARTIAL; | ||
402 | |||
403 | /* | ||
404 | * Need to migrate the page to a different list. | ||
405 | */ | ||
406 | if (new_state != slab_page->state) { | ||
407 | /* Delete - can't be in empty. */ | ||
408 | if (slab_page->state == SP_PARTIAL) | ||
409 | del_slab_page_from_partial(slab, slab_page); | ||
410 | else | ||
411 | del_slab_page_from_full(slab, slab_page); | ||
412 | |||
413 | /* And add. */ | ||
414 | if (new_state == SP_EMPTY) { | ||
415 | if (list_empty(&slab->empty)) | ||
416 | add_slab_page_to_empty(slab, slab_page); | ||
417 | else | ||
418 | free_slab_page(a, slab_page); | ||
419 | } else { | ||
420 | add_slab_page_to_partial(slab, slab_page); | ||
421 | } | ||
422 | } | ||
423 | |||
424 | /* | ||
425 | * Now handle the page_alloc. | ||
426 | */ | ||
427 | __nvgpu_free_pages(a, alloc, false); | ||
428 | a->nr_slab_frees++; | ||
429 | |||
430 | return; | ||
431 | } | ||
432 | |||
433 | /* | ||
434 | * Allocate physical pages. Since the underlying allocator is a buddy allocator | ||
435 | * the returned pages are always contiguous. However, since there could be | ||
436 | * fragmentation in the space this allocator will collate smaller non-contiguous | ||
437 | * allocations together if necessary. | ||
438 | */ | ||
439 | static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages( | ||
440 | struct nvgpu_page_allocator *a, u64 pages) | ||
441 | { | ||
442 | struct nvgpu_page_alloc *alloc; | ||
443 | struct page_alloc_chunk *c; | ||
444 | u64 max_chunk_len = pages << a->page_shift; | ||
445 | int i = 0; | ||
446 | |||
447 | alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL); | ||
448 | if (!alloc) | ||
449 | goto fail; | ||
450 | |||
451 | memset(alloc, 0, sizeof(*alloc)); | ||
452 | |||
453 | INIT_LIST_HEAD(&alloc->alloc_chunks); | ||
454 | alloc->length = pages << a->page_shift; | ||
455 | |||
456 | while (pages) { | ||
457 | u64 chunk_addr = 0; | ||
458 | u64 chunk_pages = (u64)1 << __fls(pages); | ||
459 | u64 chunk_len = chunk_pages << a->page_shift; | ||
460 | |||
461 | /* | ||
462 | * Take care of the possibility that the allocation must be | ||
463 | * contiguous. If this is not the first iteration then that | ||
464 | * means the first iteration failed to alloc the entire | ||
465 | * requested size. The buddy allocator guarantees any given | ||
466 | * single alloc is contiguous. | ||
467 | */ | ||
468 | if (a->flags & GPU_ALLOC_FORCE_CONTIG && i != 0) | ||
469 | goto fail_cleanup; | ||
470 | |||
471 | if (chunk_len > max_chunk_len) | ||
472 | chunk_len = max_chunk_len; | ||
473 | |||
474 | /* | ||
475 | * Keep attempting to allocate in smaller chunks until the alloc | ||
476 | * either succeeds or is smaller than the page_size of the | ||
477 | * allocator (i.e the allocator is OOM). | ||
478 | */ | ||
479 | do { | ||
480 | chunk_addr = nvgpu_alloc(&a->source_allocator, | ||
481 | chunk_len); | ||
482 | |||
483 | /* Divide by 2 and try again */ | ||
484 | if (!chunk_addr) { | ||
485 | palloc_dbg(a, "balloc failed: 0x%llx\n", | ||
486 | chunk_len); | ||
487 | chunk_len >>= 1; | ||
488 | max_chunk_len = chunk_len; | ||
489 | } | ||
490 | } while (!chunk_addr && chunk_len >= a->page_size); | ||
491 | |||
492 | chunk_pages = chunk_len >> a->page_shift; | ||
493 | |||
494 | if (!chunk_addr) { | ||
495 | palloc_dbg(a, "bailing @ 0x%llx\n", chunk_len); | ||
496 | goto fail_cleanup; | ||
497 | } | ||
498 | |||
499 | c = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL); | ||
500 | if (!c) { | ||
501 | nvgpu_free(&a->source_allocator, chunk_addr); | ||
502 | goto fail_cleanup; | ||
503 | } | ||
504 | |||
505 | pages -= chunk_pages; | ||
506 | |||
507 | c->base = chunk_addr; | ||
508 | c->length = chunk_len; | ||
509 | list_add(&c->list_entry, &alloc->alloc_chunks); | ||
510 | |||
511 | i++; | ||
512 | } | ||
513 | |||
514 | alloc->nr_chunks = i; | ||
515 | c = list_first_entry(&alloc->alloc_chunks, | ||
516 | struct page_alloc_chunk, list_entry); | ||
517 | alloc->base = c->base; | ||
518 | |||
519 | return alloc; | ||
520 | |||
521 | fail_cleanup: | ||
522 | while (!list_empty(&alloc->alloc_chunks)) { | ||
523 | c = list_first_entry(&alloc->alloc_chunks, | ||
524 | struct page_alloc_chunk, list_entry); | ||
525 | list_del(&c->list_entry); | ||
526 | nvgpu_free(&a->source_allocator, c->base); | ||
527 | kfree(c); | ||
528 | } | ||
529 | kfree(alloc); | ||
530 | fail: | ||
531 | return ERR_PTR(-ENOMEM); | ||
532 | } | ||
533 | |||
534 | static struct nvgpu_page_alloc *__nvgpu_alloc_pages( | ||
535 | struct nvgpu_page_allocator *a, u64 len) | ||
536 | { | ||
537 | struct nvgpu_page_alloc *alloc = NULL; | ||
538 | struct page_alloc_chunk *c; | ||
539 | u64 pages; | ||
540 | int i = 0; | ||
541 | |||
542 | pages = ALIGN(len, a->page_size) >> a->page_shift; | ||
543 | |||
544 | alloc = __do_nvgpu_alloc_pages(a, pages); | ||
545 | if (IS_ERR(alloc)) { | ||
546 | palloc_dbg(a, "Alloc 0x%llx (%llu) (failed)\n", | ||
547 | pages << a->page_shift, pages); | ||
548 | return NULL; | ||
549 | } | ||
550 | |||
551 | palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", | ||
552 | pages << a->page_shift, pages, alloc->base); | ||
553 | list_for_each_entry(c, &alloc->alloc_chunks, list_entry) { | ||
554 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", | ||
555 | i++, c->base, c->length); | ||
556 | } | ||
557 | |||
558 | return alloc; | ||
559 | } | ||
560 | |||
561 | /* | ||
562 | * Allocate enough pages to satisfy @len. Page size is determined at | ||
563 | * initialization of the allocator. | ||
564 | * | ||
565 | * The return is actually a pointer to a struct nvgpu_page_alloc pointer. This | ||
566 | * is because it doesn't make a lot of sense to return the address of the first | ||
567 | * page in the list of pages (since they could be discontiguous). This has | ||
568 | * precedent in the dma_alloc APIs, though, it's really just an annoying | ||
569 | * artifact of the fact that the nvgpu_alloc() API requires a u64 return type. | ||
570 | */ | ||
571 | static u64 nvgpu_page_alloc(struct nvgpu_allocator *__a, u64 len) | ||
572 | { | ||
573 | struct nvgpu_page_allocator *a = page_allocator(__a); | ||
574 | struct nvgpu_page_alloc *alloc = NULL; | ||
575 | u64 real_len; | ||
576 | |||
577 | /* | ||
578 | * If we want contig pages we have to round up to a power of two. It's | ||
579 | * easier to do that here than in the buddy allocator. | ||
580 | */ | ||
581 | real_len = a->flags & GPU_ALLOC_FORCE_CONTIG ? | ||
582 | roundup_pow_of_two(len) : len; | ||
583 | |||
584 | alloc_lock(__a); | ||
585 | if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES && | ||
586 | real_len <= (a->page_size / 2)) | ||
587 | alloc = __nvgpu_alloc_slab(a, real_len); | ||
588 | else | ||
589 | alloc = __nvgpu_alloc_pages(a, real_len); | ||
590 | |||
591 | if (!alloc) { | ||
592 | alloc_unlock(__a); | ||
593 | return 0; | ||
594 | } | ||
595 | |||
596 | __insert_page_alloc(a, alloc); | ||
597 | |||
598 | a->nr_allocs++; | ||
599 | if (real_len > a->page_size / 2) | ||
600 | a->pages_alloced += alloc->length >> a->page_shift; | ||
601 | alloc_unlock(__a); | ||
602 | |||
603 | if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) | ||
604 | return alloc->base; | ||
605 | else | ||
606 | return (u64) (uintptr_t) alloc; | ||
607 | } | ||
608 | |||
609 | /* | ||
610 | * Note: this will remove the nvgpu_page_alloc struct from the RB tree | ||
611 | * if it's found. | ||
612 | */ | ||
613 | static void nvgpu_page_free(struct nvgpu_allocator *__a, u64 base) | ||
614 | { | ||
615 | struct nvgpu_page_allocator *a = page_allocator(__a); | ||
616 | struct nvgpu_page_alloc *alloc; | ||
617 | |||
618 | alloc_lock(__a); | ||
619 | |||
620 | if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) | ||
621 | alloc = __find_page_alloc(a, base); | ||
622 | else | ||
623 | alloc = __find_page_alloc(a, | ||
624 | ((struct nvgpu_page_alloc *)(uintptr_t)base)->base); | ||
625 | |||
626 | if (!alloc) { | ||
627 | palloc_dbg(a, "Hrm, found no alloc?\n"); | ||
628 | goto done; | ||
629 | } | ||
630 | |||
631 | a->nr_frees++; | ||
632 | |||
633 | palloc_dbg(a, "Free 0x%llx id=0x%010llx\n", | ||
634 | alloc->length, alloc->base); | ||
635 | |||
636 | /* | ||
637 | * Frees *alloc. | ||
638 | */ | ||
639 | if (alloc->slab_page) { | ||
640 | __nvgpu_free_slab(a, alloc); | ||
641 | } else { | ||
642 | a->pages_freed += (alloc->length >> a->page_shift); | ||
643 | __nvgpu_free_pages(a, alloc, true); | ||
644 | } | ||
645 | |||
646 | done: | ||
647 | alloc_unlock(__a); | ||
648 | } | ||
649 | |||
650 | static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed( | ||
651 | struct nvgpu_page_allocator *a, u64 base, u64 length) | ||
652 | { | ||
653 | struct nvgpu_page_alloc *alloc; | ||
654 | struct page_alloc_chunk *c; | ||
655 | |||
656 | alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL); | ||
657 | c = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL); | ||
658 | if (!alloc || !c) | ||
659 | goto fail; | ||
660 | |||
661 | alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length); | ||
662 | if (!alloc->base) { | ||
663 | WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base); | ||
664 | goto fail; | ||
665 | } | ||
666 | |||
667 | alloc->nr_chunks = 1; | ||
668 | alloc->length = length; | ||
669 | INIT_LIST_HEAD(&alloc->alloc_chunks); | ||
670 | |||
671 | c->base = alloc->base; | ||
672 | c->length = length; | ||
673 | list_add(&c->list_entry, &alloc->alloc_chunks); | ||
674 | |||
675 | return alloc; | ||
676 | |||
677 | fail: | ||
678 | kfree(c); | ||
679 | kfree(alloc); | ||
680 | return ERR_PTR(-ENOMEM); | ||
681 | } | ||
682 | |||
683 | static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a, | ||
684 | u64 base, u64 len) | ||
685 | { | ||
686 | struct nvgpu_page_allocator *a = page_allocator(__a); | ||
687 | struct nvgpu_page_alloc *alloc = NULL; | ||
688 | struct page_alloc_chunk *c; | ||
689 | u64 aligned_len, pages; | ||
690 | int i = 0; | ||
691 | |||
692 | aligned_len = ALIGN(len, a->page_size); | ||
693 | pages = aligned_len >> a->page_shift; | ||
694 | |||
695 | alloc_lock(__a); | ||
696 | |||
697 | alloc = __nvgpu_alloc_pages_fixed(a, base, aligned_len); | ||
698 | if (IS_ERR(alloc)) { | ||
699 | alloc_unlock(__a); | ||
700 | return 0; | ||
701 | } | ||
702 | |||
703 | __insert_page_alloc(a, alloc); | ||
704 | alloc_unlock(__a); | ||
705 | |||
706 | palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n", | ||
707 | alloc->base, aligned_len, pages); | ||
708 | list_for_each_entry(c, &alloc->alloc_chunks, list_entry) { | ||
709 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", | ||
710 | i++, c->base, c->length); | ||
711 | } | ||
712 | |||
713 | a->nr_fixed_allocs++; | ||
714 | a->pages_alloced += pages; | ||
715 | |||
716 | if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) | ||
717 | return alloc->base; | ||
718 | else | ||
719 | return (u64) (uintptr_t) alloc; | ||
720 | } | ||
721 | |||
722 | static void nvgpu_page_free_fixed(struct nvgpu_allocator *__a, | ||
723 | u64 base, u64 len) | ||
724 | { | ||
725 | struct nvgpu_page_allocator *a = page_allocator(__a); | ||
726 | struct nvgpu_page_alloc *alloc; | ||
727 | |||
728 | alloc_lock(__a); | ||
729 | |||
730 | if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) { | ||
731 | alloc = __find_page_alloc(a, base); | ||
732 | if (!alloc) | ||
733 | goto done; | ||
734 | } else { | ||
735 | alloc = (struct nvgpu_page_alloc *) (uintptr_t) base; | ||
736 | } | ||
737 | |||
738 | palloc_dbg(a, "Free [fixed] 0x%010llx + 0x%llx\n", | ||
739 | alloc->base, alloc->length); | ||
740 | |||
741 | a->nr_fixed_frees++; | ||
742 | a->pages_freed += (alloc->length >> a->page_shift); | ||
743 | |||
744 | /* | ||
745 | * This works for the time being since the buddy allocator | ||
746 | * uses the same free function for both fixed and regular | ||
747 | * allocs. This would have to be updated if the underlying | ||
748 | * allocator were to change. | ||
749 | */ | ||
750 | __nvgpu_free_pages(a, alloc, true); | ||
751 | |||
752 | done: | ||
753 | alloc_unlock(__a); | ||
754 | } | ||
755 | |||
756 | static void nvgpu_page_allocator_destroy(struct nvgpu_allocator *__a) | ||
757 | { | ||
758 | struct nvgpu_page_allocator *a = page_allocator(__a); | ||
759 | |||
760 | alloc_lock(__a); | ||
761 | kfree(a); | ||
762 | __a->priv = NULL; | ||
763 | alloc_unlock(__a); | ||
764 | } | ||
765 | |||
766 | static void nvgpu_page_print_stats(struct nvgpu_allocator *__a, | ||
767 | struct seq_file *s, int lock) | ||
768 | { | ||
769 | struct nvgpu_page_allocator *a = page_allocator(__a); | ||
770 | int i; | ||
771 | |||
772 | if (lock) | ||
773 | alloc_lock(__a); | ||
774 | |||
775 | __alloc_pstat(s, __a, "Page allocator:\n"); | ||
776 | __alloc_pstat(s, __a, " allocs %lld\n", a->nr_allocs); | ||
777 | __alloc_pstat(s, __a, " frees %lld\n", a->nr_frees); | ||
778 | __alloc_pstat(s, __a, " fixed_allocs %lld\n", a->nr_fixed_allocs); | ||
779 | __alloc_pstat(s, __a, " fixed_frees %lld\n", a->nr_fixed_frees); | ||
780 | __alloc_pstat(s, __a, " slab_allocs %lld\n", a->nr_slab_allocs); | ||
781 | __alloc_pstat(s, __a, " slab_frees %lld\n", a->nr_slab_frees); | ||
782 | __alloc_pstat(s, __a, " pages alloced %lld\n", a->pages_alloced); | ||
783 | __alloc_pstat(s, __a, " pages freed %lld\n", a->pages_freed); | ||
784 | __alloc_pstat(s, __a, "\n"); | ||
785 | |||
786 | /* | ||
787 | * Slab info. | ||
788 | */ | ||
789 | if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES) { | ||
790 | __alloc_pstat(s, __a, "Slabs:\n"); | ||
791 | __alloc_pstat(s, __a, " size empty partial full\n"); | ||
792 | __alloc_pstat(s, __a, " ---- ----- ------- ----\n"); | ||
793 | |||
794 | for (i = 0; i < a->nr_slabs; i++) { | ||
795 | struct page_alloc_slab *slab = &a->slabs[i]; | ||
796 | |||
797 | __alloc_pstat(s, __a, " %-9u %-9d %-9u %u\n", | ||
798 | slab->slab_size, | ||
799 | slab->nr_empty, slab->nr_partial, | ||
800 | slab->nr_full); | ||
801 | } | ||
802 | __alloc_pstat(s, __a, "\n"); | ||
803 | } | ||
804 | |||
805 | __alloc_pstat(s, __a, "Source alloc: %s\n", | ||
806 | a->source_allocator.name); | ||
807 | nvgpu_alloc_print_stats(&a->source_allocator, s, lock); | ||
808 | |||
809 | if (lock) | ||
810 | alloc_unlock(__a); | ||
811 | } | ||
812 | |||
813 | static const struct nvgpu_allocator_ops page_ops = { | ||
814 | .alloc = nvgpu_page_alloc, | ||
815 | .free = nvgpu_page_free, | ||
816 | |||
817 | .alloc_fixed = nvgpu_page_alloc_fixed, | ||
818 | .free_fixed = nvgpu_page_free_fixed, | ||
819 | |||
820 | .reserve_carveout = nvgpu_page_reserve_co, | ||
821 | .release_carveout = nvgpu_page_release_co, | ||
822 | |||
823 | .base = nvgpu_page_alloc_base, | ||
824 | .length = nvgpu_page_alloc_length, | ||
825 | .end = nvgpu_page_alloc_end, | ||
826 | .inited = nvgpu_page_alloc_inited, | ||
827 | .space = nvgpu_page_alloc_space, | ||
828 | |||
829 | .fini = nvgpu_page_allocator_destroy, | ||
830 | |||
831 | .print_stats = nvgpu_page_print_stats, | ||
832 | }; | ||
833 | |||
834 | /* | ||
835 | * nr_slabs is computed as follows: divide page_size by 4096 to get number of | ||
836 | * 4k pages in page_size. Then take the base 2 log of that to get number of | ||
837 | * slabs. For 64k page_size that works on like: | ||
838 | * | ||
839 | * 1024*64 / 1024*4 = 16 | ||
840 | * ilog2(16) = 4 | ||
841 | * | ||
842 | * That gives buckets of 1, 2, 4, and 8 pages (i.e 4k, 8k, 16k, 32k). | ||
843 | */ | ||
844 | static int nvgpu_page_alloc_init_slabs(struct nvgpu_page_allocator *a) | ||
845 | { | ||
846 | size_t nr_slabs = ilog2(a->page_size >> 12); | ||
847 | unsigned int i; | ||
848 | |||
849 | a->slabs = kcalloc(nr_slabs, | ||
850 | sizeof(struct page_alloc_slab), | ||
851 | GFP_KERNEL); | ||
852 | if (!a->slabs) | ||
853 | return -ENOMEM; | ||
854 | a->nr_slabs = nr_slabs; | ||
855 | |||
856 | for (i = 0; i < nr_slabs; i++) { | ||
857 | struct page_alloc_slab *slab = &a->slabs[i]; | ||
858 | |||
859 | slab->slab_size = SZ_4K * (1 << i); | ||
860 | INIT_LIST_HEAD(&slab->empty); | ||
861 | INIT_LIST_HEAD(&slab->partial); | ||
862 | INIT_LIST_HEAD(&slab->full); | ||
863 | slab->nr_empty = 0; | ||
864 | slab->nr_partial = 0; | ||
865 | slab->nr_full = 0; | ||
866 | } | ||
867 | |||
868 | return 0; | ||
869 | } | ||
870 | |||
871 | int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, | ||
872 | const char *name, u64 base, u64 length, | ||
873 | u64 blk_size, u64 flags) | ||
874 | { | ||
875 | struct nvgpu_page_allocator *a; | ||
876 | char buddy_name[sizeof(__a->name)]; | ||
877 | int err; | ||
878 | |||
879 | mutex_lock(&meta_data_cache_lock); | ||
880 | if (!page_alloc_cache) | ||
881 | page_alloc_cache = KMEM_CACHE(nvgpu_page_alloc, 0); | ||
882 | if (!page_alloc_chunk_cache) | ||
883 | page_alloc_chunk_cache = KMEM_CACHE(page_alloc_chunk, 0); | ||
884 | if (!page_alloc_slab_page_cache) | ||
885 | page_alloc_slab_page_cache = | ||
886 | KMEM_CACHE(page_alloc_slab_page, 0); | ||
887 | mutex_unlock(&meta_data_cache_lock); | ||
888 | |||
889 | if (!page_alloc_cache || !page_alloc_chunk_cache) | ||
890 | return -ENOMEM; | ||
891 | |||
892 | if (blk_size < SZ_4K) | ||
893 | return -EINVAL; | ||
894 | |||
895 | a = kzalloc(sizeof(struct nvgpu_page_allocator), GFP_KERNEL); | ||
896 | if (!a) | ||
897 | return -ENOMEM; | ||
898 | |||
899 | err = __nvgpu_alloc_common_init(__a, name, a, false, &page_ops); | ||
900 | if (err) | ||
901 | goto fail; | ||
902 | |||
903 | a->base = base; | ||
904 | a->length = length; | ||
905 | a->page_size = blk_size; | ||
906 | a->page_shift = __ffs(blk_size); | ||
907 | a->allocs = RB_ROOT; | ||
908 | a->owner = __a; | ||
909 | a->flags = flags; | ||
910 | |||
911 | if (flags & GPU_ALLOC_4K_VIDMEM_PAGES && blk_size > SZ_4K) { | ||
912 | err = nvgpu_page_alloc_init_slabs(a); | ||
913 | if (err) | ||
914 | goto fail; | ||
915 | } | ||
916 | |||
917 | snprintf(buddy_name, sizeof(buddy_name), "%s-src", name); | ||
918 | |||
919 | err = nvgpu_buddy_allocator_init(g, &a->source_allocator, buddy_name, | ||
920 | base, length, blk_size, 0); | ||
921 | if (err) | ||
922 | goto fail; | ||
923 | |||
924 | nvgpu_init_alloc_debug(g, __a); | ||
925 | palloc_dbg(a, "New allocator: type page\n"); | ||
926 | palloc_dbg(a, " base 0x%llx\n", a->base); | ||
927 | palloc_dbg(a, " size 0x%llx\n", a->length); | ||
928 | palloc_dbg(a, " page_size 0x%llx\n", a->page_size); | ||
929 | palloc_dbg(a, " flags 0x%llx\n", a->flags); | ||
930 | palloc_dbg(a, " slabs: %d\n", a->nr_slabs); | ||
931 | |||
932 | return 0; | ||
933 | |||
934 | fail: | ||
935 | kfree(a); | ||
936 | return err; | ||
937 | } | ||