summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/mm
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2016-12-20 16:55:48 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2017-01-09 15:33:16 -0500
commit6df3992b60959d32c7113cb77e131a2547174f3a (patch)
treeefbdc9e6ccd2330d5c469ca0783ecb0137da8fc4 /drivers/gpu/nvgpu/common/mm
parente229514bece5a109cdbfe263f6329efe987e5939 (diff)
gpu: nvgpu: Move allocators to common/mm/
Move the GPU allocators to common/mm/ since the allocators are common code across all GPUs. Also rename the allocator code to move away from gk20a_ prefixed structs and functions. This caused one issue with the nvgpu_alloc() and nvgpu_free() functions. There was a function for allocating either with kmalloc() or vmalloc() depending on the size of the allocation. Those have now been renamed to nvgpu_kalloc() and nvgpu_kfree(). Bug 1799159 Change-Id: Iddda92c013612bcb209847084ec85b8953002fa5 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1274400 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/mm')
-rw-r--r--drivers/gpu/nvgpu/common/mm/bitmap_allocator.c443
-rw-r--r--drivers/gpu/nvgpu/common/mm/bitmap_allocator_priv.h70
-rw-r--r--drivers/gpu/nvgpu/common/mm/buddy_allocator.c1329
-rw-r--r--drivers/gpu/nvgpu/common/mm/buddy_allocator_priv.h192
-rw-r--r--drivers/gpu/nvgpu/common/mm/lockless_allocator.c207
-rw-r--r--drivers/gpu/nvgpu/common/mm/lockless_allocator_priv.h121
-rw-r--r--drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c212
-rw-r--r--drivers/gpu/nvgpu/common/mm/page_allocator.c937
8 files changed, 3511 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
new file mode 100644
index 00000000..6f267c85
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
@@ -0,0 +1,443 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/kernel.h>
18#include <linux/slab.h>
19#include <linux/bitops.h>
20
21#include <nvgpu/allocator.h>
22
23#include "bitmap_allocator_priv.h"
24
25static struct kmem_cache *meta_data_cache; /* slab cache for meta data. */
26static DEFINE_MUTEX(meta_data_cache_lock);
27
28static u64 nvgpu_bitmap_alloc_length(struct nvgpu_allocator *a)
29{
30 struct nvgpu_bitmap_allocator *ba = a->priv;
31
32 return ba->length;
33}
34
35static u64 nvgpu_bitmap_alloc_base(struct nvgpu_allocator *a)
36{
37 struct nvgpu_bitmap_allocator *ba = a->priv;
38
39 return ba->base;
40}
41
42static int nvgpu_bitmap_alloc_inited(struct nvgpu_allocator *a)
43{
44 struct nvgpu_bitmap_allocator *ba = a->priv;
45 int inited = ba->inited;
46
47 rmb();
48 return inited;
49}
50
51static u64 nvgpu_bitmap_alloc_end(struct nvgpu_allocator *a)
52{
53 struct nvgpu_bitmap_allocator *ba = a->priv;
54
55 return ba->base + ba->length;
56}
57
58static u64 nvgpu_bitmap_alloc_fixed(struct nvgpu_allocator *__a,
59 u64 base, u64 len)
60{
61 struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a);
62 u64 blks, offs, ret;
63
64 /* Compute the bit offset and make sure it's aligned to a block. */
65 offs = base >> a->blk_shift;
66 if (offs * a->blk_size != base)
67 return 0;
68
69 offs -= a->bit_offs;
70
71 blks = len >> a->blk_shift;
72 if (blks * a->blk_size != len)
73 blks++;
74
75 alloc_lock(__a);
76
77 /* Check if the space requested is already occupied. */
78 ret = bitmap_find_next_zero_area(a->bitmap, a->num_bits, offs, blks, 0);
79 if (ret != offs)
80 goto fail;
81
82 bitmap_set(a->bitmap, offs, blks);
83
84 a->bytes_alloced += blks * a->blk_size;
85 a->nr_fixed_allocs++;
86 alloc_unlock(__a);
87
88 alloc_dbg(__a, "Alloc-fixed 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]\n",
89 base, len, blks, blks);
90 return base;
91
92fail:
93 alloc_unlock(__a);
94 alloc_dbg(__a, "Alloc-fixed failed! (0x%llx)\n", base);
95 return 0;
96}
97
98/*
99 * Two possibilities for this function: either we are freeing a fixed allocation
100 * or we are freeing a regular alloc but with GPU_ALLOC_NO_ALLOC_PAGE defined.
101 *
102 * Note: this function won't do much error checking. Thus you could really
103 * confuse the allocator if you misuse this function.
104 */
105static void nvgpu_bitmap_free_fixed(struct nvgpu_allocator *__a,
106 u64 base, u64 len)
107{
108 struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a);
109 u64 blks, offs;
110
111 offs = base >> a->blk_shift;
112 if (WARN_ON(offs * a->blk_size != base))
113 return;
114
115 offs -= a->bit_offs;
116
117 blks = len >> a->blk_shift;
118 if (blks * a->blk_size != len)
119 blks++;
120
121 alloc_lock(__a);
122 bitmap_clear(a->bitmap, offs, blks);
123 a->bytes_freed += blks * a->blk_size;
124 alloc_unlock(__a);
125
126 alloc_dbg(__a, "Free-fixed 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]\n",
127 base, len, blks, blks);
128}
129
130/*
131 * Add the passed alloc to the tree of stored allocations.
132 */
133static void insert_alloc_metadata(struct nvgpu_bitmap_allocator *a,
134 struct nvgpu_bitmap_alloc *alloc)
135{
136 struct rb_node **new = &a->allocs.rb_node;
137 struct rb_node *parent = NULL;
138 struct nvgpu_bitmap_alloc *tmp;
139
140 while (*new) {
141 tmp = container_of(*new, struct nvgpu_bitmap_alloc,
142 alloc_entry);
143
144 parent = *new;
145 if (alloc->base < tmp->base)
146 new = &((*new)->rb_left);
147 else if (alloc->base > tmp->base)
148 new = &((*new)->rb_right);
149 else {
150 WARN_ON("Duplicate entries in RB alloc tree!\n");
151 return;
152 }
153 }
154
155 rb_link_node(&alloc->alloc_entry, parent, new);
156 rb_insert_color(&alloc->alloc_entry, &a->allocs);
157}
158
159/*
160 * Find and remove meta-data from the outstanding allocations.
161 */
162static struct nvgpu_bitmap_alloc *find_alloc_metadata(
163 struct nvgpu_bitmap_allocator *a, u64 addr)
164{
165 struct rb_node *node = a->allocs.rb_node;
166 struct nvgpu_bitmap_alloc *alloc;
167
168 while (node) {
169 alloc = container_of(node, struct nvgpu_bitmap_alloc,
170 alloc_entry);
171
172 if (addr < alloc->base)
173 node = node->rb_left;
174 else if (addr > alloc->base)
175 node = node->rb_right;
176 else
177 break;
178 }
179
180 if (!node)
181 return NULL;
182
183 rb_erase(node, &a->allocs);
184
185 return alloc;
186}
187
188/*
189 * Tree of alloc meta data stores the address of the alloc not the bit offset.
190 */
191static int __nvgpu_bitmap_store_alloc(struct nvgpu_bitmap_allocator *a,
192 u64 addr, u64 len)
193{
194 struct nvgpu_bitmap_alloc *alloc =
195 kmem_cache_alloc(meta_data_cache, GFP_KERNEL);
196
197 if (!alloc)
198 return -ENOMEM;
199
200 alloc->base = addr;
201 alloc->length = len;
202
203 insert_alloc_metadata(a, alloc);
204
205 return 0;
206}
207
208/*
209 * @len is in bytes. This routine will figure out the right number of bits to
210 * actually allocate. The return is the address in bytes as well.
211 */
212static u64 nvgpu_bitmap_alloc(struct nvgpu_allocator *__a, u64 len)
213{
214 u64 blks, addr;
215 unsigned long offs, adjusted_offs, limit;
216 struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a);
217
218 blks = len >> a->blk_shift;
219
220 if (blks * a->blk_size != len)
221 blks++;
222
223 alloc_lock(__a);
224
225 /*
226 * First look from next_blk and onwards...
227 */
228 offs = bitmap_find_next_zero_area(a->bitmap, a->num_bits,
229 a->next_blk, blks, 0);
230 if (offs >= a->num_bits) {
231 /*
232 * If that didn't work try the remaining area. Since there can
233 * be available space that spans across a->next_blk we need to
234 * search up to the first set bit after that.
235 */
236 limit = find_next_bit(a->bitmap, a->num_bits, a->next_blk);
237 offs = bitmap_find_next_zero_area(a->bitmap, limit,
238 0, blks, 0);
239 if (offs >= a->next_blk)
240 goto fail;
241 }
242
243 bitmap_set(a->bitmap, offs, blks);
244 a->next_blk = offs + blks;
245
246 adjusted_offs = offs + a->bit_offs;
247 addr = ((u64)adjusted_offs) * a->blk_size;
248
249 /*
250 * Only do meta-data storage if we are allowed to allocate storage for
251 * that meta-data. The issue with using kmalloc() and friends is that
252 * in latency and success critical paths an alloc_page() call can either
253 * sleep for potentially a long time or, assuming GFP_ATOMIC, fail.
254 * Since we might not want either of these possibilities assume that the
255 * caller will keep what data it needs around to successfully free this
256 * allocation.
257 */
258 if (!(a->flags & GPU_ALLOC_NO_ALLOC_PAGE) &&
259 __nvgpu_bitmap_store_alloc(a, addr, blks * a->blk_size))
260 goto fail_reset_bitmap;
261
262 alloc_dbg(__a, "Alloc 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]\n",
263 addr, len, blks, blks);
264
265 a->nr_allocs++;
266 a->bytes_alloced += (blks * a->blk_size);
267 alloc_unlock(__a);
268
269 return addr;
270
271fail_reset_bitmap:
272 bitmap_clear(a->bitmap, offs, blks);
273fail:
274 a->next_blk = 0;
275 alloc_unlock(__a);
276 alloc_dbg(__a, "Alloc failed!\n");
277 return 0;
278}
279
280static void nvgpu_bitmap_free(struct nvgpu_allocator *__a, u64 addr)
281{
282 struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a);
283 struct nvgpu_bitmap_alloc *alloc = NULL;
284 u64 offs, adjusted_offs, blks;
285
286 alloc_lock(__a);
287
288 if (a->flags & GPU_ALLOC_NO_ALLOC_PAGE) {
289 WARN(1, "Using wrong free for NO_ALLOC_PAGE bitmap allocator");
290 goto done;
291 }
292
293 alloc = find_alloc_metadata(a, addr);
294 if (!alloc)
295 goto done;
296
297 /*
298 * Address comes from adjusted offset (i.e the bit offset with
299 * a->bit_offs added. So start with that and then work out the real
300 * offs into the bitmap.
301 */
302 adjusted_offs = addr >> a->blk_shift;
303 offs = adjusted_offs - a->bit_offs;
304 blks = alloc->length >> a->blk_shift;
305
306 bitmap_clear(a->bitmap, offs, blks);
307 alloc_dbg(__a, "Free 0x%-10llx\n", addr);
308
309 a->bytes_freed += alloc->length;
310
311done:
312 kfree(alloc);
313 alloc_unlock(__a);
314}
315
316static void nvgpu_bitmap_alloc_destroy(struct nvgpu_allocator *__a)
317{
318 struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a);
319 struct nvgpu_bitmap_alloc *alloc;
320 struct rb_node *node;
321
322 /*
323 * Kill any outstanding allocations.
324 */
325 while ((node = rb_first(&a->allocs)) != NULL) {
326 alloc = container_of(node, struct nvgpu_bitmap_alloc,
327 alloc_entry);
328
329 rb_erase(node, &a->allocs);
330 kfree(alloc);
331 }
332
333 kfree(a->bitmap);
334 kfree(a);
335}
336
337static void nvgpu_bitmap_print_stats(struct nvgpu_allocator *__a,
338 struct seq_file *s, int lock)
339{
340 struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a);
341
342 __alloc_pstat(s, __a, "Bitmap allocator params:\n");
343 __alloc_pstat(s, __a, " start = 0x%llx\n", a->base);
344 __alloc_pstat(s, __a, " end = 0x%llx\n", a->base + a->length);
345 __alloc_pstat(s, __a, " blks = 0x%llx\n", a->num_bits);
346
347 /* Actual stats. */
348 __alloc_pstat(s, __a, "Stats:\n");
349 __alloc_pstat(s, __a, " Number allocs = 0x%llx\n", a->nr_allocs);
350 __alloc_pstat(s, __a, " Number fixed = 0x%llx\n", a->nr_fixed_allocs);
351 __alloc_pstat(s, __a, " Bytes alloced = 0x%llx\n", a->bytes_alloced);
352 __alloc_pstat(s, __a, " Bytes freed = 0x%llx\n", a->bytes_freed);
353 __alloc_pstat(s, __a, " Outstanding = 0x%llx\n",
354 a->bytes_alloced - a->bytes_freed);
355}
356
357static const struct nvgpu_allocator_ops bitmap_ops = {
358 .alloc = nvgpu_bitmap_alloc,
359 .free = nvgpu_bitmap_free,
360
361 .alloc_fixed = nvgpu_bitmap_alloc_fixed,
362 .free_fixed = nvgpu_bitmap_free_fixed,
363
364 .base = nvgpu_bitmap_alloc_base,
365 .length = nvgpu_bitmap_alloc_length,
366 .end = nvgpu_bitmap_alloc_end,
367 .inited = nvgpu_bitmap_alloc_inited,
368
369 .fini = nvgpu_bitmap_alloc_destroy,
370
371 .print_stats = nvgpu_bitmap_print_stats,
372};
373
374
375int nvgpu_bitmap_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
376 const char *name, u64 base, u64 length,
377 u64 blk_size, u64 flags)
378{
379 int err;
380 struct nvgpu_bitmap_allocator *a;
381
382 mutex_lock(&meta_data_cache_lock);
383 if (!meta_data_cache)
384 meta_data_cache = KMEM_CACHE(nvgpu_bitmap_alloc, 0);
385 mutex_unlock(&meta_data_cache_lock);
386
387 if (!meta_data_cache)
388 return -ENOMEM;
389
390 if (WARN_ON(blk_size & (blk_size - 1)))
391 return -EINVAL;
392
393 /*
394 * blk_size must be a power-of-2; base length also need to be aligned
395 * to blk_size.
396 */
397 if (blk_size & (blk_size - 1) ||
398 base & (blk_size - 1) || length & (blk_size - 1))
399 return -EINVAL;
400
401 if (base == 0) {
402 base = blk_size;
403 length -= blk_size;
404 }
405
406 a = kzalloc(sizeof(struct nvgpu_bitmap_allocator), GFP_KERNEL);
407 if (!a)
408 return -ENOMEM;
409
410 err = __nvgpu_alloc_common_init(__a, name, a, false, &bitmap_ops);
411 if (err)
412 goto fail;
413
414 a->base = base;
415 a->length = length;
416 a->blk_size = blk_size;
417 a->blk_shift = __ffs(a->blk_size);
418 a->num_bits = length >> a->blk_shift;
419 a->bit_offs = a->base >> a->blk_shift;
420 a->flags = flags;
421
422 a->bitmap = kcalloc(BITS_TO_LONGS(a->num_bits), sizeof(*a->bitmap),
423 GFP_KERNEL);
424 if (!a->bitmap)
425 goto fail;
426
427 wmb();
428 a->inited = true;
429
430 nvgpu_init_alloc_debug(g, __a);
431 alloc_dbg(__a, "New allocator: type bitmap\n");
432 alloc_dbg(__a, " base 0x%llx\n", a->base);
433 alloc_dbg(__a, " bit_offs 0x%llx\n", a->bit_offs);
434 alloc_dbg(__a, " size 0x%llx\n", a->length);
435 alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size);
436 alloc_dbg(__a, " flags 0x%llx\n", a->flags);
437
438 return 0;
439
440fail:
441 kfree(a);
442 return err;
443}
diff --git a/drivers/gpu/nvgpu/common/mm/bitmap_allocator_priv.h b/drivers/gpu/nvgpu/common/mm/bitmap_allocator_priv.h
new file mode 100644
index 00000000..9802b9db
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/bitmap_allocator_priv.h
@@ -0,0 +1,70 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef BITMAP_ALLOCATOR_PRIV_H
18#define BITMAP_ALLOCATOR_PRIV_H
19
20#include <linux/rbtree.h>
21
22struct nvgpu_allocator;
23
24struct nvgpu_bitmap_allocator {
25 struct nvgpu_allocator *owner;
26
27 u64 base; /* Base address of the space. */
28 u64 length; /* Length of the space. */
29 u64 blk_size; /* Size that corresponds to 1 bit. */
30 u64 blk_shift; /* Bit shift to divide by blk_size. */
31 u64 num_bits; /* Number of allocatable bits. */
32 u64 bit_offs; /* Offset of bitmap. */
33
34 /*
35 * Optimization for making repeated allocations faster. Keep track of
36 * the next bit after the most recent allocation. This is where the next
37 * search will start from. This should make allocation faster in cases
38 * where lots of allocations get made one after another. It shouldn't
39 * have a negative impact on the case where the allocator is fragmented.
40 */
41 u64 next_blk;
42
43 unsigned long *bitmap; /* The actual bitmap! */
44 struct rb_root allocs; /* Tree of outstanding allocations. */
45
46 u64 flags;
47
48 bool inited;
49
50 /* Statistics */
51 u64 nr_allocs;
52 u64 nr_fixed_allocs;
53 u64 bytes_alloced;
54 u64 bytes_freed;
55};
56
57struct nvgpu_bitmap_alloc {
58 u64 base;
59 u64 length;
60 struct rb_node alloc_entry; /* RB tree of allocations. */
61};
62
63static inline struct nvgpu_bitmap_allocator *bitmap_allocator(
64 struct nvgpu_allocator *a)
65{
66 return (struct nvgpu_bitmap_allocator *)(a)->priv;
67}
68
69
70#endif
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
new file mode 100644
index 00000000..39a53801
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
@@ -0,0 +1,1329 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/kernel.h>
18#include <linux/slab.h>
19
20#include <nvgpu/allocator.h>
21
22#include "gk20a/mm_gk20a.h"
23#include "gk20a/platform_gk20a.h"
24
25#include "buddy_allocator_priv.h"
26
27static struct kmem_cache *buddy_cache; /* slab cache for meta data. */
28
29/* Some other buddy allocator functions. */
30static struct nvgpu_buddy *balloc_free_buddy(struct nvgpu_buddy_allocator *a,
31 u64 addr);
32static void balloc_coalesce(struct nvgpu_buddy_allocator *a,
33 struct nvgpu_buddy *b);
34static void __balloc_do_free_fixed(struct nvgpu_buddy_allocator *a,
35 struct nvgpu_fixed_alloc *falloc);
36
37/*
38 * This function is not present in older kernel's list.h code.
39 */
40#ifndef list_last_entry
41#define list_last_entry(ptr, type, member) \
42 list_entry((ptr)->prev, type, member)
43#endif
44
45/*
46 * GPU buddy allocator for various address spaces.
47 *
48 * Current limitations:
49 * o A fixed allocation could potentially be made that borders PDEs with
50 * different PTE sizes. This would require that fixed buffer to have
51 * different sized PTEs for different parts of the allocation. Probably
52 * best to just require PDE alignment for fixed address allocs.
53 *
54 * o It is currently possible to make an allocator that has a buddy alignment
55 * out of sync with the PDE block size alignment. A simple example is a
56 * 32GB address space starting at byte 1. Every buddy is shifted off by 1
57 * which means each buddy corresponf to more than one actual GPU page. The
58 * best way to fix this is probably just require PDE blocksize alignment
59 * for the start of the address space. At the moment all allocators are
60 * easily PDE aligned so this hasn't been a problem.
61 */
62
63/*
64 * Pick a suitable maximum order for this allocator.
65 *
66 * Hueristic: Just guessing that the best max order is the largest single
67 * block that will fit in the address space.
68 */
69static void balloc_compute_max_order(struct nvgpu_buddy_allocator *a)
70{
71 u64 true_max_order = ilog2(a->blks);
72
73 if (a->max_order == 0) {
74 a->max_order = true_max_order;
75 return;
76 }
77
78 if (a->max_order > true_max_order)
79 a->max_order = true_max_order;
80 if (a->max_order > GPU_BALLOC_MAX_ORDER)
81 a->max_order = GPU_BALLOC_MAX_ORDER;
82}
83
84/*
85 * Since we can only allocate in chucks of a->blk_size we need to trim off
86 * any excess data that is not aligned to a->blk_size.
87 */
88static void balloc_allocator_align(struct nvgpu_buddy_allocator *a)
89{
90 a->start = ALIGN(a->base, a->blk_size);
91 WARN_ON(a->start != a->base);
92 a->end = (a->base + a->length) & ~(a->blk_size - 1);
93 a->count = a->end - a->start;
94 a->blks = a->count >> a->blk_shift;
95}
96
97/*
98 * Pass NULL for parent if you want a top level buddy.
99 */
100static struct nvgpu_buddy *balloc_new_buddy(struct nvgpu_buddy_allocator *a,
101 struct nvgpu_buddy *parent,
102 u64 start, u64 order)
103{
104 struct nvgpu_buddy *new_buddy;
105
106 new_buddy = kmem_cache_alloc(buddy_cache, GFP_KERNEL);
107 if (!new_buddy)
108 return NULL;
109
110 memset(new_buddy, 0, sizeof(struct nvgpu_buddy));
111
112 new_buddy->parent = parent;
113 new_buddy->start = start;
114 new_buddy->order = order;
115 new_buddy->end = start + (1 << order) * a->blk_size;
116 new_buddy->pte_size = BALLOC_PTE_SIZE_ANY;
117
118 return new_buddy;
119}
120
121static void __balloc_buddy_list_add(struct nvgpu_buddy_allocator *a,
122 struct nvgpu_buddy *b,
123 struct list_head *list)
124{
125 if (buddy_is_in_list(b)) {
126 alloc_dbg(balloc_owner(a),
127 "Oops: adding added buddy (%llu:0x%llx)\n",
128 b->order, b->start);
129 BUG();
130 }
131
132 /*
133 * Add big PTE blocks to the tail, small to the head for GVA spaces.
134 * This lets the code that checks if there are available blocks check
135 * without cycling through the entire list.
136 */
137 if (a->flags & GPU_ALLOC_GVA_SPACE &&
138 b->pte_size == gmmu_page_size_big)
139 list_add_tail(&b->buddy_entry, list);
140 else
141 list_add(&b->buddy_entry, list);
142
143 buddy_set_in_list(b);
144}
145
146static void __balloc_buddy_list_rem(struct nvgpu_buddy_allocator *a,
147 struct nvgpu_buddy *b)
148{
149 if (!buddy_is_in_list(b)) {
150 alloc_dbg(balloc_owner(a),
151 "Oops: removing removed buddy (%llu:0x%llx)\n",
152 b->order, b->start);
153 BUG();
154 }
155
156 list_del_init(&b->buddy_entry);
157 buddy_clr_in_list(b);
158}
159
160/*
161 * Add a buddy to one of the buddy lists and deal with the necessary
162 * book keeping. Adds the buddy to the list specified by the buddy's order.
163 */
164static void balloc_blist_add(struct nvgpu_buddy_allocator *a,
165 struct nvgpu_buddy *b)
166{
167 __balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order));
168 a->buddy_list_len[b->order]++;
169}
170
171static void balloc_blist_rem(struct nvgpu_buddy_allocator *a,
172 struct nvgpu_buddy *b)
173{
174 __balloc_buddy_list_rem(a, b);
175 a->buddy_list_len[b->order]--;
176}
177
178static u64 balloc_get_order(struct nvgpu_buddy_allocator *a, u64 len)
179{
180 if (len == 0)
181 return 0;
182
183 len--;
184 len >>= a->blk_shift;
185
186 return fls(len);
187}
188
189static u64 __balloc_max_order_in(struct nvgpu_buddy_allocator *a,
190 u64 start, u64 end)
191{
192 u64 size = (end - start) >> a->blk_shift;
193
194 if (size > 0)
195 return min_t(u64, ilog2(size), a->max_order);
196 else
197 return GPU_BALLOC_MAX_ORDER;
198}
199
200/*
201 * Initialize the buddy lists.
202 */
203static int balloc_init_lists(struct nvgpu_buddy_allocator *a)
204{
205 int i;
206 u64 bstart, bend, order;
207 struct nvgpu_buddy *buddy;
208
209 bstart = a->start;
210 bend = a->end;
211
212 /* First make sure the LLs are valid. */
213 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++)
214 INIT_LIST_HEAD(balloc_get_order_list(a, i));
215
216 while (bstart < bend) {
217 order = __balloc_max_order_in(a, bstart, bend);
218
219 buddy = balloc_new_buddy(a, NULL, bstart, order);
220 if (!buddy)
221 goto cleanup;
222
223 balloc_blist_add(a, buddy);
224 bstart += balloc_order_to_len(a, order);
225 }
226
227 return 0;
228
229cleanup:
230 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) {
231 if (!list_empty(balloc_get_order_list(a, i))) {
232 buddy = list_first_entry(balloc_get_order_list(a, i),
233 struct nvgpu_buddy, buddy_entry);
234 balloc_blist_rem(a, buddy);
235 kmem_cache_free(buddy_cache, buddy);
236 }
237 }
238
239 return -ENOMEM;
240}
241
242/*
243 * Clean up and destroy the passed allocator.
244 */
245static void nvgpu_buddy_allocator_destroy(struct nvgpu_allocator *__a)
246{
247 int i;
248 struct rb_node *node;
249 struct nvgpu_buddy *bud;
250 struct nvgpu_fixed_alloc *falloc;
251 struct nvgpu_buddy_allocator *a = __a->priv;
252
253 alloc_lock(__a);
254
255 nvgpu_fini_alloc_debug(__a);
256
257 /*
258 * Free the fixed allocs first.
259 */
260 while ((node = rb_first(&a->fixed_allocs)) != NULL) {
261 falloc = container_of(node,
262 struct nvgpu_fixed_alloc, alloced_entry);
263
264 rb_erase(node, &a->fixed_allocs);
265 __balloc_do_free_fixed(a, falloc);
266 }
267
268 /*
269 * And now free all outstanding allocations.
270 */
271 while ((node = rb_first(&a->alloced_buddies)) != NULL) {
272 bud = container_of(node, struct nvgpu_buddy, alloced_entry);
273 balloc_free_buddy(a, bud->start);
274 balloc_blist_add(a, bud);
275 balloc_coalesce(a, bud);
276 }
277
278 /*
279 * Now clean up the unallocated buddies.
280 */
281 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) {
282 BUG_ON(a->buddy_list_alloced[i] != 0);
283
284 while (!list_empty(balloc_get_order_list(a, i))) {
285 bud = list_first_entry(balloc_get_order_list(a, i),
286 struct nvgpu_buddy, buddy_entry);
287 balloc_blist_rem(a, bud);
288 kmem_cache_free(buddy_cache, bud);
289 }
290
291 if (a->buddy_list_len[i] != 0) {
292 pr_info("Excess buddies!!! (%d: %llu)\n",
293 i, a->buddy_list_len[i]);
294 BUG();
295 }
296 if (a->buddy_list_split[i] != 0) {
297 pr_info("Excess split nodes!!! (%d: %llu)\n",
298 i, a->buddy_list_split[i]);
299 BUG();
300 }
301 if (a->buddy_list_alloced[i] != 0) {
302 pr_info("Excess alloced nodes!!! (%d: %llu)\n",
303 i, a->buddy_list_alloced[i]);
304 BUG();
305 }
306 }
307
308 kfree(a);
309
310 alloc_unlock(__a);
311}
312
313/*
314 * Combine the passed buddy if possible. The pointer in @b may not be valid
315 * after this as the buddy may be freed.
316 *
317 * @a must be locked.
318 */
319static void balloc_coalesce(struct nvgpu_buddy_allocator *a,
320 struct nvgpu_buddy *b)
321{
322 struct nvgpu_buddy *parent;
323
324 if (buddy_is_alloced(b) || buddy_is_split(b))
325 return;
326
327 /*
328 * If both our buddy and I are both not allocated and not split then
329 * we can coalesce ourselves.
330 */
331 if (!b->buddy)
332 return;
333 if (buddy_is_alloced(b->buddy) || buddy_is_split(b->buddy))
334 return;
335
336 parent = b->parent;
337
338 balloc_blist_rem(a, b);
339 balloc_blist_rem(a, b->buddy);
340
341 buddy_clr_split(parent);
342 a->buddy_list_split[parent->order]--;
343 balloc_blist_add(a, parent);
344
345 /*
346 * Recursively coalesce as far as we can go.
347 */
348 balloc_coalesce(a, parent);
349
350 /* Clean up the remains. */
351 kmem_cache_free(buddy_cache, b->buddy);
352 kmem_cache_free(buddy_cache, b);
353}
354
355/*
356 * Split a buddy into two new buddies who are 1/2 the size of the parent buddy.
357 *
358 * @a must be locked.
359 */
360static int balloc_split_buddy(struct nvgpu_buddy_allocator *a,
361 struct nvgpu_buddy *b, int pte_size)
362{
363 struct nvgpu_buddy *left, *right;
364 u64 half;
365
366 left = balloc_new_buddy(a, b, b->start, b->order - 1);
367 if (!left)
368 return -ENOMEM;
369
370 half = (b->end - b->start) / 2;
371
372 right = balloc_new_buddy(a, b, b->start + half, b->order - 1);
373 if (!right) {
374 kmem_cache_free(buddy_cache, left);
375 return -ENOMEM;
376 }
377
378 buddy_set_split(b);
379 a->buddy_list_split[b->order]++;
380
381 b->left = left;
382 b->right = right;
383 left->buddy = right;
384 right->buddy = left;
385 left->parent = b;
386 right->parent = b;
387
388 /* PTE considerations. */
389 if (a->flags & GPU_ALLOC_GVA_SPACE &&
390 left->order <= a->pte_blk_order) {
391 left->pte_size = pte_size;
392 right->pte_size = pte_size;
393 }
394
395 balloc_blist_rem(a, b);
396 balloc_blist_add(a, left);
397 balloc_blist_add(a, right);
398
399 return 0;
400}
401
402/*
403 * Place the passed buddy into the RB tree for allocated buddies. Never fails
404 * unless the passed entry is a duplicate which is a bug.
405 *
406 * @a must be locked.
407 */
408static void balloc_alloc_buddy(struct nvgpu_buddy_allocator *a,
409 struct nvgpu_buddy *b)
410{
411 struct rb_node **new = &(a->alloced_buddies.rb_node);
412 struct rb_node *parent = NULL;
413
414 while (*new) {
415 struct nvgpu_buddy *bud = container_of(*new, struct nvgpu_buddy,
416 alloced_entry);
417
418 parent = *new;
419 if (b->start < bud->start)
420 new = &((*new)->rb_left);
421 else if (b->start > bud->start)
422 new = &((*new)->rb_right);
423 else
424 BUG_ON("Duplicate entries in allocated list!\n");
425 }
426
427 rb_link_node(&b->alloced_entry, parent, new);
428 rb_insert_color(&b->alloced_entry, &a->alloced_buddies);
429
430 buddy_set_alloced(b);
431 a->buddy_list_alloced[b->order]++;
432}
433
434/*
435 * Remove the passed buddy from the allocated buddy RB tree. Returns the
436 * deallocated buddy for further processing.
437 *
438 * @a must be locked.
439 */
440static struct nvgpu_buddy *balloc_free_buddy(struct nvgpu_buddy_allocator *a,
441 u64 addr)
442{
443 struct rb_node *node = a->alloced_buddies.rb_node;
444 struct nvgpu_buddy *bud;
445
446 while (node) {
447 bud = container_of(node, struct nvgpu_buddy, alloced_entry);
448
449 if (addr < bud->start)
450 node = node->rb_left;
451 else if (addr > bud->start)
452 node = node->rb_right;
453 else
454 break;
455 }
456
457 if (!node)
458 return NULL;
459
460 rb_erase(node, &a->alloced_buddies);
461 buddy_clr_alloced(bud);
462 a->buddy_list_alloced[bud->order]--;
463
464 return bud;
465}
466
467/*
468 * Find a suitable buddy for the given order and PTE type (big or little).
469 */
470static struct nvgpu_buddy *__balloc_find_buddy(struct nvgpu_buddy_allocator *a,
471 u64 order, int pte_size)
472{
473 struct nvgpu_buddy *bud;
474
475 if (order > a->max_order ||
476 list_empty(balloc_get_order_list(a, order)))
477 return NULL;
478
479 if (a->flags & GPU_ALLOC_GVA_SPACE &&
480 pte_size == gmmu_page_size_big)
481 bud = list_last_entry(balloc_get_order_list(a, order),
482 struct nvgpu_buddy, buddy_entry);
483 else
484 bud = list_first_entry(balloc_get_order_list(a, order),
485 struct nvgpu_buddy, buddy_entry);
486
487 if (bud->pte_size != BALLOC_PTE_SIZE_ANY &&
488 bud->pte_size != pte_size)
489 return NULL;
490
491 return bud;
492}
493
494/*
495 * Allocate a suitably sized buddy. If no suitable buddy exists split higher
496 * order buddies until we have a suitable buddy to allocate.
497 *
498 * For PDE grouping add an extra check to see if a buddy is suitable: that the
499 * buddy exists in a PDE who's PTE size is reasonable
500 *
501 * @a must be locked.
502 */
503static u64 __balloc_do_alloc(struct nvgpu_buddy_allocator *a,
504 u64 order, int pte_size)
505{
506 u64 split_order;
507 struct nvgpu_buddy *bud = NULL;
508
509 split_order = order;
510 while (split_order <= a->max_order &&
511 !(bud = __balloc_find_buddy(a, split_order, pte_size)))
512 split_order++;
513
514 /* Out of memory! */
515 if (!bud)
516 return 0;
517
518 while (bud->order != order) {
519 if (balloc_split_buddy(a, bud, pte_size))
520 return 0; /* No mem... */
521 bud = bud->left;
522 }
523
524 balloc_blist_rem(a, bud);
525 balloc_alloc_buddy(a, bud);
526
527 return bud->start;
528}
529
530/*
531 * See if the passed range is actually available for allocation. If so, then
532 * return 1, otherwise return 0.
533 *
534 * TODO: Right now this uses the unoptimal approach of going through all
535 * outstanding allocations and checking their base/ends. This could be better.
536 */
537static int balloc_is_range_free(struct nvgpu_buddy_allocator *a,
538 u64 base, u64 end)
539{
540 struct rb_node *node;
541 struct nvgpu_buddy *bud;
542
543 node = rb_first(&a->alloced_buddies);
544 if (!node)
545 return 1; /* No allocs yet. */
546
547 bud = container_of(node, struct nvgpu_buddy, alloced_entry);
548
549 while (bud->start < end) {
550 if ((bud->start > base && bud->start < end) ||
551 (bud->end > base && bud->end < end))
552 return 0;
553
554 node = rb_next(node);
555 if (!node)
556 break;
557 bud = container_of(node, struct nvgpu_buddy, alloced_entry);
558 }
559
560 return 1;
561}
562
563static void balloc_alloc_fixed(struct nvgpu_buddy_allocator *a,
564 struct nvgpu_fixed_alloc *f)
565{
566 struct rb_node **new = &(a->fixed_allocs.rb_node);
567 struct rb_node *parent = NULL;
568
569 while (*new) {
570 struct nvgpu_fixed_alloc *falloc =
571 container_of(*new, struct nvgpu_fixed_alloc,
572 alloced_entry);
573
574 BUG_ON(!virt_addr_valid(falloc));
575
576 parent = *new;
577 if (f->start < falloc->start)
578 new = &((*new)->rb_left);
579 else if (f->start > falloc->start)
580 new = &((*new)->rb_right);
581 else
582 BUG_ON("Duplicate entries in allocated list!\n");
583 }
584
585 rb_link_node(&f->alloced_entry, parent, new);
586 rb_insert_color(&f->alloced_entry, &a->fixed_allocs);
587}
588
589/*
590 * Remove the passed buddy from the allocated buddy RB tree. Returns the
591 * deallocated buddy for further processing.
592 *
593 * @a must be locked.
594 */
595static struct nvgpu_fixed_alloc *balloc_free_fixed(
596 struct nvgpu_buddy_allocator *a, u64 addr)
597{
598 struct rb_node *node = a->fixed_allocs.rb_node;
599 struct nvgpu_fixed_alloc *falloc;
600
601 while (node) {
602 falloc = container_of(node,
603 struct nvgpu_fixed_alloc, alloced_entry);
604
605 if (addr < falloc->start)
606 node = node->rb_left;
607 else if (addr > falloc->start)
608 node = node->rb_right;
609 else
610 break;
611 }
612
613 if (!node)
614 return NULL;
615
616 rb_erase(node, &a->fixed_allocs);
617
618 return falloc;
619}
620
621/*
622 * Find the parent range - doesn't necessarily need the parent to actually exist
623 * as a buddy. Finding an existing parent comes later...
624 */
625static void __balloc_get_parent_range(struct nvgpu_buddy_allocator *a,
626 u64 base, u64 order,
627 u64 *pbase, u64 *porder)
628{
629 u64 base_mask;
630 u64 shifted_base = balloc_base_shift(a, base);
631
632 order++;
633 base_mask = ~((a->blk_size << order) - 1);
634
635 shifted_base &= base_mask;
636
637 *pbase = balloc_base_unshift(a, shifted_base);
638 *porder = order;
639}
640
641/*
642 * Makes a buddy at the passed address. This will make all parent buddies
643 * necessary for this buddy to exist as well.
644 */
645static struct nvgpu_buddy *__balloc_make_fixed_buddy(
646 struct nvgpu_buddy_allocator *a, u64 base, u64 order)
647{
648 struct nvgpu_buddy *bud = NULL;
649 struct list_head *order_list;
650 u64 cur_order = order, cur_base = base;
651
652 /*
653 * Algo:
654 * 1. Keep jumping up a buddy order until we find the real buddy that
655 * this buddy exists in.
656 * 2. Then work our way down through the buddy tree until we hit a dead
657 * end.
658 * 3. Start splitting buddies until we split to the one we need to
659 * make.
660 */
661 while (cur_order <= a->max_order) {
662 int found = 0;
663
664 order_list = balloc_get_order_list(a, cur_order);
665 list_for_each_entry(bud, order_list, buddy_entry) {
666 if (bud->start == cur_base) {
667 found = 1;
668 break;
669 }
670 }
671
672 if (found)
673 break;
674
675 __balloc_get_parent_range(a, cur_base, cur_order,
676 &cur_base, &cur_order);
677 }
678
679 if (cur_order > a->max_order) {
680 alloc_dbg(balloc_owner(a), "No buddy for range ???\n");
681 return NULL;
682 }
683
684 /* Split this buddy as necessary until we get the target buddy. */
685 while (bud->start != base || bud->order != order) {
686 if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) {
687 balloc_coalesce(a, bud);
688 return NULL;
689 }
690
691 if (base < bud->right->start)
692 bud = bud->left;
693 else
694 bud = bud->right;
695
696 }
697
698 return bud;
699}
700
701static u64 __balloc_do_alloc_fixed(struct nvgpu_buddy_allocator *a,
702 struct nvgpu_fixed_alloc *falloc,
703 u64 base, u64 len)
704{
705 u64 shifted_base, inc_base;
706 u64 align_order;
707
708 shifted_base = balloc_base_shift(a, base);
709 if (shifted_base == 0)
710 align_order = __fls(len >> a->blk_shift);
711 else
712 align_order = min_t(u64,
713 __ffs(shifted_base >> a->blk_shift),
714 __fls(len >> a->blk_shift));
715
716 if (align_order > a->max_order) {
717 alloc_dbg(balloc_owner(a),
718 "Align order too big: %llu > %llu\n",
719 align_order, a->max_order);
720 return 0;
721 }
722
723 /*
724 * Generate a list of buddies that satisfy this allocation.
725 */
726 inc_base = shifted_base;
727 while (inc_base < (shifted_base + len)) {
728 u64 order_len = balloc_order_to_len(a, align_order);
729 u64 remaining;
730 struct nvgpu_buddy *bud;
731
732 bud = __balloc_make_fixed_buddy(a,
733 balloc_base_unshift(a, inc_base),
734 align_order);
735 if (!bud) {
736 alloc_dbg(balloc_owner(a),
737 "Fixed buddy failed: {0x%llx, %llu}!\n",
738 balloc_base_unshift(a, inc_base),
739 align_order);
740 goto err_and_cleanup;
741 }
742
743 balloc_blist_rem(a, bud);
744 balloc_alloc_buddy(a, bud);
745 __balloc_buddy_list_add(a, bud, &falloc->buddies);
746
747 /* Book keeping. */
748 inc_base += order_len;
749 remaining = (shifted_base + len) - inc_base;
750 align_order = __ffs(inc_base >> a->blk_shift);
751
752 /* If we don't have much left - trim down align_order. */
753 if (balloc_order_to_len(a, align_order) > remaining)
754 align_order = __balloc_max_order_in(a, inc_base,
755 inc_base + remaining);
756 }
757
758 return base;
759
760err_and_cleanup:
761 while (!list_empty(&falloc->buddies)) {
762 struct nvgpu_buddy *bud = list_first_entry(&falloc->buddies,
763 struct nvgpu_buddy,
764 buddy_entry);
765
766 __balloc_buddy_list_rem(a, bud);
767 balloc_free_buddy(a, bud->start);
768 kmem_cache_free(buddy_cache, bud);
769 }
770
771 return 0;
772}
773
774static void __balloc_do_free_fixed(struct nvgpu_buddy_allocator *a,
775 struct nvgpu_fixed_alloc *falloc)
776{
777 struct nvgpu_buddy *bud;
778
779 while (!list_empty(&falloc->buddies)) {
780 bud = list_first_entry(&falloc->buddies,
781 struct nvgpu_buddy,
782 buddy_entry);
783 __balloc_buddy_list_rem(a, bud);
784
785 balloc_free_buddy(a, bud->start);
786 balloc_blist_add(a, bud);
787 a->bytes_freed += balloc_order_to_len(a, bud->order);
788
789 /*
790 * Attemp to defrag the allocation.
791 */
792 balloc_coalesce(a, bud);
793 }
794
795 kfree(falloc);
796}
797
798/*
799 * Allocate memory from the passed allocator.
800 */
801static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *__a, u64 len)
802{
803 u64 order, addr;
804 int pte_size;
805 struct nvgpu_buddy_allocator *a = __a->priv;
806
807 nvgpu_alloc_trace_func();
808
809 alloc_lock(__a);
810
811 order = balloc_get_order(a, len);
812
813 if (order > a->max_order) {
814 alloc_unlock(__a);
815 alloc_dbg(balloc_owner(a), "Alloc fail\n");
816 nvgpu_alloc_trace_func_done();
817 return 0;
818 }
819
820 /*
821 * For now pass the base address of the allocator's region to
822 * __get_pte_size(). This ensures we get the right page size for
823 * the alloc but we don't have to know what the real address is
824 * going to be quite yet.
825 *
826 * TODO: once userspace supports a unified address space pass 0 for
827 * the base. This will make only 'len' affect the PTE size.
828 */
829 if (a->flags & GPU_ALLOC_GVA_SPACE)
830 pte_size = __get_pte_size(a->vm, a->base, len);
831 else
832 pte_size = BALLOC_PTE_SIZE_ANY;
833
834 addr = __balloc_do_alloc(a, order, pte_size);
835
836 if (addr) {
837 a->bytes_alloced += len;
838 a->bytes_alloced_real += balloc_order_to_len(a, order);
839 alloc_dbg(balloc_owner(a),
840 "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n",
841 addr, order, len,
842 pte_size == gmmu_page_size_big ? "big" :
843 pte_size == gmmu_page_size_small ? "small" :
844 "NA/any");
845 } else {
846 alloc_dbg(balloc_owner(a), "Alloc failed: no mem!\n");
847 }
848
849 a->alloc_made = 1;
850
851 alloc_unlock(__a);
852
853 nvgpu_alloc_trace_func_done();
854 return addr;
855}
856
857/*
858 * Requires @__a to be locked.
859 */
860static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a,
861 u64 base, u64 len)
862{
863 u64 ret, real_bytes = 0;
864 struct nvgpu_buddy *bud;
865 struct nvgpu_fixed_alloc *falloc = NULL;
866 struct nvgpu_buddy_allocator *a = __a->priv;
867
868 nvgpu_alloc_trace_func();
869
870 /* If base isn't aligned to an order 0 block, fail. */
871 if (base & (a->blk_size - 1))
872 goto fail;
873
874 if (len == 0)
875 goto fail;
876
877 falloc = kmalloc(sizeof(*falloc), GFP_KERNEL);
878 if (!falloc)
879 goto fail;
880
881 INIT_LIST_HEAD(&falloc->buddies);
882 falloc->start = base;
883 falloc->end = base + len;
884
885 if (!balloc_is_range_free(a, base, base + len)) {
886 alloc_dbg(balloc_owner(a),
887 "Range not free: 0x%llx -> 0x%llx\n",
888 base, base + len);
889 goto fail_unlock;
890 }
891
892 ret = __balloc_do_alloc_fixed(a, falloc, base, len);
893 if (!ret) {
894 alloc_dbg(balloc_owner(a),
895 "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n",
896 base, base + len);
897 goto fail_unlock;
898 }
899
900 balloc_alloc_fixed(a, falloc);
901
902 list_for_each_entry(bud, &falloc->buddies, buddy_entry)
903 real_bytes += (bud->end - bud->start);
904
905 a->bytes_alloced += len;
906 a->bytes_alloced_real += real_bytes;
907
908 alloc_dbg(balloc_owner(a), "Alloc (fixed) 0x%llx\n", base);
909
910 nvgpu_alloc_trace_func_done();
911 return base;
912
913fail_unlock:
914 alloc_unlock(__a);
915fail:
916 kfree(falloc);
917 nvgpu_alloc_trace_func_done();
918 return 0;
919}
920
921/*
922 * Allocate a fixed address allocation. The address of the allocation is @base
923 * and the length is @len. This is not a typical buddy allocator operation and
924 * as such has a high posibility of failure if the address space is heavily in
925 * use.
926 *
927 * Please do not use this function unless _absolutely_ necessary.
928 */
929static u64 nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a,
930 u64 base, u64 len)
931{
932 u64 alloc;
933 struct nvgpu_buddy_allocator *a = __a->priv;
934
935 alloc_lock(__a);
936 alloc = __nvgpu_balloc_fixed_buddy(__a, base, len);
937 a->alloc_made = 1;
938 alloc_unlock(__a);
939
940 return alloc;
941}
942
943/*
944 * Free the passed allocation.
945 */
946static void nvgpu_buddy_bfree(struct nvgpu_allocator *__a, u64 addr)
947{
948 struct nvgpu_buddy *bud;
949 struct nvgpu_fixed_alloc *falloc;
950 struct nvgpu_buddy_allocator *a = __a->priv;
951
952 nvgpu_alloc_trace_func();
953
954 if (!addr) {
955 nvgpu_alloc_trace_func_done();
956 return;
957 }
958
959 alloc_lock(__a);
960
961 /*
962 * First see if this is a fixed alloc. If not fall back to a regular
963 * buddy.
964 */
965 falloc = balloc_free_fixed(a, addr);
966 if (falloc) {
967 __balloc_do_free_fixed(a, falloc);
968 goto done;
969 }
970
971 bud = balloc_free_buddy(a, addr);
972 if (!bud)
973 goto done;
974
975 balloc_blist_add(a, bud);
976 a->bytes_freed += balloc_order_to_len(a, bud->order);
977
978 /*
979 * Attemp to defrag the allocation.
980 */
981 balloc_coalesce(a, bud);
982
983done:
984 alloc_unlock(__a);
985 alloc_dbg(balloc_owner(a), "Free 0x%llx\n", addr);
986 nvgpu_alloc_trace_func_done();
987 return;
988}
989
990static bool nvgpu_buddy_reserve_is_possible(struct nvgpu_buddy_allocator *a,
991 struct nvgpu_alloc_carveout *co)
992{
993 struct nvgpu_alloc_carveout *tmp;
994 u64 co_base, co_end;
995
996 co_base = co->base;
997 co_end = co->base + co->length;
998
999 /*
1000 * Not the fastest approach but we should not have that many carveouts
1001 * for any reasonable allocator.
1002 */
1003 list_for_each_entry(tmp, &a->co_list, co_entry) {
1004 if ((co_base >= tmp->base &&
1005 co_base < (tmp->base + tmp->length)) ||
1006 (co_end >= tmp->base &&
1007 co_end < (tmp->base + tmp->length)))
1008 return false;
1009 }
1010
1011 return true;
1012}
1013
1014/*
1015 * Carveouts can only be reserved before any regular allocations have been
1016 * made.
1017 */
1018static int nvgpu_buddy_reserve_co(struct nvgpu_allocator *__a,
1019 struct nvgpu_alloc_carveout *co)
1020{
1021 struct nvgpu_buddy_allocator *a = __a->priv;
1022 u64 addr;
1023 int err = 0;
1024
1025 if (co->base < a->start || (co->base + co->length) > a->end ||
1026 a->alloc_made)
1027 return -EINVAL;
1028
1029 alloc_lock(__a);
1030
1031 if (!nvgpu_buddy_reserve_is_possible(a, co)) {
1032 err = -EBUSY;
1033 goto done;
1034 }
1035
1036 /* Should not be possible to fail... */
1037 addr = __nvgpu_balloc_fixed_buddy(__a, co->base, co->length);
1038 if (!addr) {
1039 err = -ENOMEM;
1040 pr_warn("%s: Failed to reserve a valid carveout!\n", __func__);
1041 goto done;
1042 }
1043
1044 list_add(&co->co_entry, &a->co_list);
1045
1046done:
1047 alloc_unlock(__a);
1048 return err;
1049}
1050
1051/*
1052 * Carveouts can be release at any time.
1053 */
1054static void nvgpu_buddy_release_co(struct nvgpu_allocator *__a,
1055 struct nvgpu_alloc_carveout *co)
1056{
1057 alloc_lock(__a);
1058
1059 list_del_init(&co->co_entry);
1060 nvgpu_free(__a, co->base);
1061
1062 alloc_unlock(__a);
1063}
1064
1065static u64 nvgpu_buddy_alloc_length(struct nvgpu_allocator *a)
1066{
1067 struct nvgpu_buddy_allocator *ba = a->priv;
1068
1069 return ba->length;
1070}
1071
1072static u64 nvgpu_buddy_alloc_base(struct nvgpu_allocator *a)
1073{
1074 struct nvgpu_buddy_allocator *ba = a->priv;
1075
1076 return ba->start;
1077}
1078
1079static int nvgpu_buddy_alloc_inited(struct nvgpu_allocator *a)
1080{
1081 struct nvgpu_buddy_allocator *ba = a->priv;
1082 int inited = ba->initialized;
1083
1084 rmb();
1085 return inited;
1086}
1087
1088static u64 nvgpu_buddy_alloc_end(struct nvgpu_allocator *a)
1089{
1090 struct nvgpu_buddy_allocator *ba = a->priv;
1091
1092 return ba->end;
1093}
1094
1095static u64 nvgpu_buddy_alloc_space(struct nvgpu_allocator *a)
1096{
1097 struct nvgpu_buddy_allocator *ba = a->priv;
1098 u64 space;
1099
1100 alloc_lock(a);
1101 space = ba->end - ba->start -
1102 (ba->bytes_alloced_real - ba->bytes_freed);
1103 alloc_unlock(a);
1104
1105 return space;
1106}
1107
1108/*
1109 * Print the buddy allocator top level stats. If you pass @s as NULL then the
1110 * stats are printed to the kernel log. This lets this code be used for
1111 * debugging purposes internal to the allocator.
1112 */
1113static void nvgpu_buddy_print_stats(struct nvgpu_allocator *__a,
1114 struct seq_file *s, int lock)
1115{
1116 int i = 0;
1117 struct rb_node *node;
1118 struct nvgpu_fixed_alloc *falloc;
1119 struct nvgpu_alloc_carveout *tmp;
1120 struct nvgpu_buddy_allocator *a = __a->priv;
1121
1122 __alloc_pstat(s, __a, "base = %llu, limit = %llu, blk_size = %llu\n",
1123 a->base, a->length, a->blk_size);
1124 __alloc_pstat(s, __a, "Internal params:\n");
1125 __alloc_pstat(s, __a, " start = 0x%llx\n", a->start);
1126 __alloc_pstat(s, __a, " end = 0x%llx\n", a->end);
1127 __alloc_pstat(s, __a, " count = 0x%llx\n", a->count);
1128 __alloc_pstat(s, __a, " blks = 0x%llx\n", a->blks);
1129 __alloc_pstat(s, __a, " max_order = %llu\n", a->max_order);
1130
1131 if (lock)
1132 alloc_lock(__a);
1133
1134 if (!list_empty(&a->co_list)) {
1135 __alloc_pstat(s, __a, "\n");
1136 __alloc_pstat(s, __a, "Carveouts:\n");
1137 list_for_each_entry(tmp, &a->co_list, co_entry)
1138 __alloc_pstat(s, __a,
1139 " CO %2d: %-20s 0x%010llx + 0x%llx\n",
1140 i++, tmp->name, tmp->base, tmp->length);
1141 }
1142
1143 __alloc_pstat(s, __a, "\n");
1144 __alloc_pstat(s, __a, "Buddy blocks:\n");
1145 __alloc_pstat(s, __a, " Order Free Alloced Split\n");
1146 __alloc_pstat(s, __a, " ----- ---- ------- -----\n");
1147
1148 for (i = a->max_order; i >= 0; i--) {
1149 if (a->buddy_list_len[i] == 0 &&
1150 a->buddy_list_alloced[i] == 0 &&
1151 a->buddy_list_split[i] == 0)
1152 continue;
1153
1154 __alloc_pstat(s, __a, " %3d %-7llu %-9llu %llu\n", i,
1155 a->buddy_list_len[i],
1156 a->buddy_list_alloced[i],
1157 a->buddy_list_split[i]);
1158 }
1159
1160 __alloc_pstat(s, __a, "\n");
1161
1162 for (node = rb_first(&a->fixed_allocs), i = 1;
1163 node != NULL;
1164 node = rb_next(node)) {
1165 falloc = container_of(node,
1166 struct nvgpu_fixed_alloc, alloced_entry);
1167
1168 __alloc_pstat(s, __a, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n",
1169 i, falloc->start, falloc->end);
1170 }
1171
1172 __alloc_pstat(s, __a, "\n");
1173 __alloc_pstat(s, __a, "Bytes allocated: %llu\n",
1174 a->bytes_alloced);
1175 __alloc_pstat(s, __a, "Bytes allocated (real): %llu\n",
1176 a->bytes_alloced_real);
1177 __alloc_pstat(s, __a, "Bytes freed: %llu\n",
1178 a->bytes_freed);
1179
1180 if (lock)
1181 alloc_unlock(__a);
1182}
1183
1184static const struct nvgpu_allocator_ops buddy_ops = {
1185 .alloc = nvgpu_buddy_balloc,
1186 .free = nvgpu_buddy_bfree,
1187
1188 .alloc_fixed = nvgpu_balloc_fixed_buddy,
1189 /* .free_fixed not needed. */
1190
1191 .reserve_carveout = nvgpu_buddy_reserve_co,
1192 .release_carveout = nvgpu_buddy_release_co,
1193
1194 .base = nvgpu_buddy_alloc_base,
1195 .length = nvgpu_buddy_alloc_length,
1196 .end = nvgpu_buddy_alloc_end,
1197 .inited = nvgpu_buddy_alloc_inited,
1198 .space = nvgpu_buddy_alloc_space,
1199
1200 .fini = nvgpu_buddy_allocator_destroy,
1201
1202 .print_stats = nvgpu_buddy_print_stats,
1203};
1204
1205/*
1206 * Initialize a buddy allocator. Returns 0 on success. This allocator does
1207 * not necessarily manage bytes. It manages distinct ranges of resources. This
1208 * allows the allocator to work for things like comp_tags, semaphores, etc.
1209 *
1210 * @allocator: Ptr to an allocator struct to init.
1211 * @vm: GPU VM to associate this allocator with. Can be NULL. Will be used to
1212 * get PTE size for GVA spaces.
1213 * @name: Name of the allocator. Doesn't have to be static storage.
1214 * @base: The base address of the resource pool being managed.
1215 * @size: Number of resources in the pool.
1216 * @blk_size: Minimum number of resources to allocate at once. For things like
1217 * semaphores this is 1. For GVA this might be as much as 64k. This
1218 * corresponds to order 0. Must be power of 2.
1219 * @max_order: Pick a maximum order. If you leave this as 0, the buddy allocator
1220 * will try and pick a reasonable max order.
1221 * @flags: Extra flags necessary. See GPU_BALLOC_*.
1222 */
1223int __nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
1224 struct vm_gk20a *vm, const char *name,
1225 u64 base, u64 size, u64 blk_size,
1226 u64 max_order, u64 flags)
1227{
1228 int err;
1229 u64 pde_size;
1230 struct nvgpu_buddy_allocator *a;
1231
1232 /* blk_size must be greater than 0 and a power of 2. */
1233 if (blk_size == 0)
1234 return -EINVAL;
1235 if (blk_size & (blk_size - 1))
1236 return -EINVAL;
1237
1238 if (max_order > GPU_BALLOC_MAX_ORDER)
1239 return -EINVAL;
1240
1241 /* If this is to manage a GVA space we need a VM. */
1242 if (flags & GPU_ALLOC_GVA_SPACE && !vm)
1243 return -EINVAL;
1244
1245 a = kzalloc(sizeof(struct nvgpu_buddy_allocator), GFP_KERNEL);
1246 if (!a)
1247 return -ENOMEM;
1248
1249 err = __nvgpu_alloc_common_init(__a, name, a, false, &buddy_ops);
1250 if (err)
1251 goto fail;
1252
1253 a->base = base;
1254 a->length = size;
1255 a->blk_size = blk_size;
1256 a->blk_shift = __ffs(blk_size);
1257 a->owner = __a;
1258
1259 /*
1260 * If base is 0 then modfy base to be the size of one block so that we
1261 * can return errors by returning addr == 0.
1262 */
1263 if (a->base == 0) {
1264 a->base = a->blk_size;
1265 a->length -= a->blk_size;
1266 }
1267
1268 a->vm = vm;
1269 if (flags & GPU_ALLOC_GVA_SPACE) {
1270 pde_size = ((u64)vm->big_page_size) << 10;
1271 a->pte_blk_order = balloc_get_order(a, pde_size);
1272 }
1273
1274 /*
1275 * When we have a GVA space with big_pages enabled the size and base
1276 * must be PDE aligned. If big_pages are not enabled then this
1277 * requirement is not necessary.
1278 */
1279 if (flags & GPU_ALLOC_GVA_SPACE && vm->big_pages &&
1280 (base & ((vm->big_page_size << 10) - 1) ||
1281 size & ((vm->big_page_size << 10) - 1)))
1282 return -EINVAL;
1283
1284 a->flags = flags;
1285 a->max_order = max_order;
1286
1287 balloc_allocator_align(a);
1288 balloc_compute_max_order(a);
1289
1290 /* Shared buddy kmem_cache for all allocators. */
1291 if (!buddy_cache)
1292 buddy_cache = KMEM_CACHE(nvgpu_buddy, 0);
1293 if (!buddy_cache) {
1294 err = -ENOMEM;
1295 goto fail;
1296 }
1297
1298 a->alloced_buddies = RB_ROOT;
1299 a->fixed_allocs = RB_ROOT;
1300 INIT_LIST_HEAD(&a->co_list);
1301 err = balloc_init_lists(a);
1302 if (err)
1303 goto fail;
1304
1305 wmb();
1306 a->initialized = 1;
1307
1308 nvgpu_init_alloc_debug(g, __a);
1309 alloc_dbg(__a, "New allocator: type buddy\n");
1310 alloc_dbg(__a, " base 0x%llx\n", a->base);
1311 alloc_dbg(__a, " size 0x%llx\n", a->length);
1312 alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size);
1313 alloc_dbg(__a, " max_order %llu\n", a->max_order);
1314 alloc_dbg(__a, " flags 0x%llx\n", a->flags);
1315
1316 return 0;
1317
1318fail:
1319 kfree(a);
1320 return err;
1321}
1322
1323int nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *a,
1324 const char *name, u64 base, u64 size,
1325 u64 blk_size, u64 flags)
1326{
1327 return __nvgpu_buddy_allocator_init(g, a, NULL, name,
1328 base, size, blk_size, 0, 0);
1329}
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator_priv.h b/drivers/gpu/nvgpu/common/mm/buddy_allocator_priv.h
new file mode 100644
index 00000000..50a11f14
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator_priv.h
@@ -0,0 +1,192 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef BUDDY_ALLOCATOR_PRIV_H
18#define BUDDY_ALLOCATOR_PRIV_H
19
20#include <linux/list.h>
21#include <linux/rbtree.h>
22
23struct nvgpu_allocator;
24struct vm_gk20a;
25
26/*
27 * Each buddy is an element in a binary tree.
28 */
29struct nvgpu_buddy {
30 struct nvgpu_buddy *parent; /* Parent node. */
31 struct nvgpu_buddy *buddy; /* This node's buddy. */
32 struct nvgpu_buddy *left; /* Lower address sub-node. */
33 struct nvgpu_buddy *right; /* Higher address sub-node. */
34
35 struct list_head buddy_entry; /* List entry for various lists. */
36 struct rb_node alloced_entry; /* RB tree of allocations. */
37
38 u64 start; /* Start address of this buddy. */
39 u64 end; /* End address of this buddy. */
40 u64 order; /* Buddy order. */
41
42#define BALLOC_BUDDY_ALLOCED 0x1
43#define BALLOC_BUDDY_SPLIT 0x2
44#define BALLOC_BUDDY_IN_LIST 0x4
45 int flags; /* List of associated flags. */
46
47 /*
48 * Size of the PDE this buddy is using. This allows for grouping like
49 * sized allocations into the same PDE. This uses the gmmu_pgsz_gk20a
50 * enum except for the BALLOC_PTE_SIZE_ANY specifier.
51 */
52#define BALLOC_PTE_SIZE_ANY -1
53 int pte_size;
54};
55
56#define __buddy_flag_ops(flag, flag_up) \
57 static inline int buddy_is_ ## flag(struct nvgpu_buddy *b) \
58 { \
59 return b->flags & BALLOC_BUDDY_ ## flag_up; \
60 } \
61 static inline void buddy_set_ ## flag(struct nvgpu_buddy *b) \
62 { \
63 b->flags |= BALLOC_BUDDY_ ## flag_up; \
64 } \
65 static inline void buddy_clr_ ## flag(struct nvgpu_buddy *b) \
66 { \
67 b->flags &= ~BALLOC_BUDDY_ ## flag_up; \
68 }
69
70/*
71 * int buddy_is_alloced(struct nvgpu_buddy *b);
72 * void buddy_set_alloced(struct nvgpu_buddy *b);
73 * void buddy_clr_alloced(struct nvgpu_buddy *b);
74 *
75 * int buddy_is_split(struct nvgpu_buddy *b);
76 * void buddy_set_split(struct nvgpu_buddy *b);
77 * void buddy_clr_split(struct nvgpu_buddy *b);
78 *
79 * int buddy_is_in_list(struct nvgpu_buddy *b);
80 * void buddy_set_in_list(struct nvgpu_buddy *b);
81 * void buddy_clr_in_list(struct nvgpu_buddy *b);
82 */
83__buddy_flag_ops(alloced, ALLOCED);
84__buddy_flag_ops(split, SPLIT);
85__buddy_flag_ops(in_list, IN_LIST);
86
87/*
88 * Keeps info for a fixed allocation.
89 */
90struct nvgpu_fixed_alloc {
91 struct list_head buddies; /* List of buddies. */
92 struct rb_node alloced_entry; /* RB tree of fixed allocations. */
93
94 u64 start; /* Start of fixed block. */
95 u64 end; /* End address. */
96};
97
98/*
99 * GPU buddy allocator for the various GPU address spaces. Each addressable unit
100 * doesn't have to correspond to a byte. In some cases each unit is a more
101 * complex object such as a comp_tag line or the like.
102 *
103 * The max order is computed based on the size of the minimum order and the size
104 * of the address space.
105 *
106 * order_size is the size of an order 0 buddy.
107 */
108struct nvgpu_buddy_allocator {
109 struct nvgpu_allocator *owner; /* Owner of this buddy allocator. */
110 struct vm_gk20a *vm; /* Parent VM - can be NULL. */
111
112 u64 base; /* Base address of the space. */
113 u64 length; /* Length of the space. */
114 u64 blk_size; /* Size of order 0 allocation. */
115 u64 blk_shift; /* Shift to divide by blk_size. */
116
117 /* Internal stuff. */
118 u64 start; /* Real start (aligned to blk_size). */
119 u64 end; /* Real end, trimmed if needed. */
120 u64 count; /* Count of objects in space. */
121 u64 blks; /* Count of blks in the space. */
122 u64 max_order; /* Specific maximum order. */
123
124 struct rb_root alloced_buddies; /* Outstanding allocations. */
125 struct rb_root fixed_allocs; /* Outstanding fixed allocations. */
126
127 struct list_head co_list;
128
129 /*
130 * Impose an upper bound on the maximum order.
131 */
132#define GPU_BALLOC_ORDER_LIST_LEN (GPU_BALLOC_MAX_ORDER + 1)
133
134 struct list_head buddy_list[GPU_BALLOC_ORDER_LIST_LEN];
135 u64 buddy_list_len[GPU_BALLOC_ORDER_LIST_LEN];
136 u64 buddy_list_split[GPU_BALLOC_ORDER_LIST_LEN];
137 u64 buddy_list_alloced[GPU_BALLOC_ORDER_LIST_LEN];
138
139 /*
140 * This is for when the allocator is managing a GVA space (the
141 * GPU_ALLOC_GVA_SPACE bit is set in @flags). This requires
142 * that we group like sized allocations into PDE blocks.
143 */
144 u64 pte_blk_order;
145
146 int initialized;
147 int alloc_made; /* True after the first alloc. */
148
149 u64 flags;
150
151 u64 bytes_alloced;
152 u64 bytes_alloced_real;
153 u64 bytes_freed;
154};
155
156static inline struct nvgpu_buddy_allocator *buddy_allocator(
157 struct nvgpu_allocator *a)
158{
159 return (struct nvgpu_buddy_allocator *)(a)->priv;
160}
161
162static inline struct list_head *balloc_get_order_list(
163 struct nvgpu_buddy_allocator *a, int order)
164{
165 return &a->buddy_list[order];
166}
167
168static inline u64 balloc_order_to_len(struct nvgpu_buddy_allocator *a,
169 int order)
170{
171 return (1 << order) * a->blk_size;
172}
173
174static inline u64 balloc_base_shift(struct nvgpu_buddy_allocator *a,
175 u64 base)
176{
177 return base - a->start;
178}
179
180static inline u64 balloc_base_unshift(struct nvgpu_buddy_allocator *a,
181 u64 base)
182{
183 return base + a->start;
184}
185
186static inline struct nvgpu_allocator *balloc_owner(
187 struct nvgpu_buddy_allocator *a)
188{
189 return a->owner;
190}
191
192#endif
diff --git a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
new file mode 100644
index 00000000..e3063a42
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
@@ -0,0 +1,207 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/kernel.h>
18#include <linux/slab.h>
19#include <linux/vmalloc.h>
20#include <linux/atomic.h>
21
22#include <nvgpu/allocator.h>
23
24#include "lockless_allocator_priv.h"
25
26static u64 nvgpu_lockless_alloc_length(struct nvgpu_allocator *a)
27{
28 struct nvgpu_lockless_allocator *pa = a->priv;
29
30 return pa->length;
31}
32
33static u64 nvgpu_lockless_alloc_base(struct nvgpu_allocator *a)
34{
35 struct nvgpu_lockless_allocator *pa = a->priv;
36
37 return pa->base;
38}
39
40static int nvgpu_lockless_alloc_inited(struct nvgpu_allocator *a)
41{
42 struct nvgpu_lockless_allocator *pa = a->priv;
43 int inited = pa->inited;
44
45 rmb();
46 return inited;
47}
48
49static u64 nvgpu_lockless_alloc_end(struct nvgpu_allocator *a)
50{
51 struct nvgpu_lockless_allocator *pa = a->priv;
52
53 return pa->base + pa->length;
54}
55
56static u64 nvgpu_lockless_alloc(struct nvgpu_allocator *a, u64 len)
57{
58 struct nvgpu_lockless_allocator *pa = a->priv;
59 int head, new_head, ret;
60 u64 addr = 0;
61
62 if (len != pa->blk_size)
63 return 0;
64
65 head = ACCESS_ONCE(pa->head);
66 while (head >= 0) {
67 new_head = ACCESS_ONCE(pa->next[head]);
68 ret = cmpxchg(&pa->head, head, new_head);
69 if (ret == head) {
70 addr = pa->base + head * pa->blk_size;
71 atomic_inc(&pa->nr_allocs);
72 alloc_dbg(a, "Alloc node # %d @ addr 0x%llx\n", head,
73 addr);
74 break;
75 }
76 head = ACCESS_ONCE(pa->head);
77 }
78 return addr;
79}
80
81static void nvgpu_lockless_free(struct nvgpu_allocator *a, u64 addr)
82{
83 struct nvgpu_lockless_allocator *pa = a->priv;
84 int head, ret;
85 u64 cur_idx, rem;
86
87 cur_idx = addr - pa->base;
88 rem = do_div(cur_idx, pa->blk_size);
89
90 while (1) {
91 head = ACCESS_ONCE(pa->head);
92 ACCESS_ONCE(pa->next[cur_idx]) = head;
93 ret = cmpxchg(&pa->head, head, cur_idx);
94 if (ret == head) {
95 atomic_dec(&pa->nr_allocs);
96 alloc_dbg(a, "Free node # %llu\n", cur_idx);
97 break;
98 }
99 }
100}
101
102static void nvgpu_lockless_alloc_destroy(struct nvgpu_allocator *a)
103{
104 struct nvgpu_lockless_allocator *pa = a->priv;
105
106 nvgpu_fini_alloc_debug(a);
107
108 vfree(pa->next);
109 kfree(pa);
110}
111
112static void nvgpu_lockless_print_stats(struct nvgpu_allocator *a,
113 struct seq_file *s, int lock)
114{
115 struct nvgpu_lockless_allocator *pa = a->priv;
116
117 __alloc_pstat(s, a, "Lockless allocator params:\n");
118 __alloc_pstat(s, a, " start = 0x%llx\n", pa->base);
119 __alloc_pstat(s, a, " end = 0x%llx\n", pa->base + pa->length);
120
121 /* Actual stats. */
122 __alloc_pstat(s, a, "Stats:\n");
123 __alloc_pstat(s, a, " Number allocs = %d\n",
124 atomic_read(&pa->nr_allocs));
125 __alloc_pstat(s, a, " Number free = %d\n",
126 pa->nr_nodes - atomic_read(&pa->nr_allocs));
127}
128
129static const struct nvgpu_allocator_ops pool_ops = {
130 .alloc = nvgpu_lockless_alloc,
131 .free = nvgpu_lockless_free,
132
133 .base = nvgpu_lockless_alloc_base,
134 .length = nvgpu_lockless_alloc_length,
135 .end = nvgpu_lockless_alloc_end,
136 .inited = nvgpu_lockless_alloc_inited,
137
138 .fini = nvgpu_lockless_alloc_destroy,
139
140 .print_stats = nvgpu_lockless_print_stats,
141};
142
143int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
144 const char *name, u64 base, u64 length,
145 u64 blk_size, u64 flags)
146{
147 int i;
148 int err;
149 int nr_nodes;
150 u64 count, rem;
151 struct nvgpu_lockless_allocator *a;
152
153 if (!blk_size)
154 return -EINVAL;
155
156 /*
157 * Ensure we have space for atleast one node & there's no overflow.
158 * In order to control memory footprint, we require count < INT_MAX
159 */
160 count = length;
161 rem = do_div(count, blk_size);
162 if (!base || !count || count > INT_MAX)
163 return -EINVAL;
164
165 a = kzalloc(sizeof(struct nvgpu_lockless_allocator), GFP_KERNEL);
166 if (!a)
167 return -ENOMEM;
168
169 err = __nvgpu_alloc_common_init(__a, name, a, false, &pool_ops);
170 if (err)
171 goto fail;
172
173 a->next = vzalloc(sizeof(*a->next) * count);
174 if (!a->next) {
175 err = -ENOMEM;
176 goto fail;
177 }
178
179 /* chain the elements together to form the initial free list */
180 nr_nodes = (int)count;
181 for (i = 0; i < nr_nodes; i++)
182 a->next[i] = i + 1;
183 a->next[nr_nodes - 1] = -1;
184
185 a->base = base;
186 a->length = length;
187 a->blk_size = blk_size;
188 a->nr_nodes = nr_nodes;
189 a->flags = flags;
190 atomic_set(&a->nr_allocs, 0);
191
192 wmb();
193 a->inited = true;
194
195 nvgpu_init_alloc_debug(g, __a);
196 alloc_dbg(__a, "New allocator: type lockless\n");
197 alloc_dbg(__a, " base 0x%llx\n", a->base);
198 alloc_dbg(__a, " nodes %d\n", a->nr_nodes);
199 alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size);
200 alloc_dbg(__a, " flags 0x%llx\n", a->flags);
201
202 return 0;
203
204fail:
205 kfree(a);
206 return err;
207}
diff --git a/drivers/gpu/nvgpu/common/mm/lockless_allocator_priv.h b/drivers/gpu/nvgpu/common/mm/lockless_allocator_priv.h
new file mode 100644
index 00000000..32421ac1
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/lockless_allocator_priv.h
@@ -0,0 +1,121 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17/*
18 * Basics:
19 *
20 * - Lockless memory allocator for fixed-size structures, whose
21 * size is defined up front at init time.
22 * - Memory footprint scales linearly w/ the number of structures in
23 * the pool. It is ~= sizeof(int) * N.
24 * - Memory is pre-allocated by the client. The allocator itself
25 * only computes the addresses for allocations.
26 * - Limit of MAX_INT nodes that the allocator can be responsible for.
27 *
28 * Implementation details:
29 *
30 * The allocator maintains a single list of free nodes. We allocate &
31 * free nodes from the head of the list. We rely on the cmpxchg() operator
32 * to maintain atomicity on the head.
33 *
34 * So, both allocs & frees are O(1)!!
35 *
36 * -- Definitions --
37 * Block Size - size of a single structure that this allocator will
38 * allocate.
39 * Node - one of the elements of size blk_size in the
40 * client-allocated buffer.
41 * Node Index - zero-based index of a node in the client-allocated
42 * contiguous buffer.
43 *
44 * -- Initial State --
45 * We maintain the following to track the state of the free list:
46 *
47 * 1) A "head" index to track the index of the first free node in the list
48 * 2) A "next" array to track the index of the next free node in the list
49 * for every node. So next[head], will give the index to the 2nd free
50 * element in the list.
51 *
52 * So, to begin with, the free list consists of all node indices, and each
53 * position in the next array contains index N + 1:
54 *
55 * head = 0
56 * next = [1, 2, 3, 4, -1] : Example for a user-allocated buffer of 5 nodes
57 * free_list = 0->1->2->3->4->-1
58 *
59 * -- Allocations --
60 * 1) Read the current head (aka acq_head)
61 * 2) Read next[acq_head], to get the 2nd free element (aka new_head)
62 * 3) cmp_xchg(&head, acq_head, new_head)
63 * 4) If it succeeds, compute the address of the node, based on
64 * base address, blk_size, & acq_head.
65 *
66 * head = 1;
67 * next = [1, 2, 3, 4, -1] : Example after allocating Node #0
68 * free_list = 1->2->3->4->-1
69 *
70 * head = 2;
71 * next = [1, 2, 3, 4, -1] : Example after allocating Node #1
72 * free_list = 2->3->4->-1
73 *
74 * -- Frees --
75 * 1) Based on the address to be freed, calculate the index of the node
76 * being freed (cur_idx)
77 * 2) Read the current head (old_head)
78 * 3) So the freed node is going to go at the head of the list, and we
79 * want to put the old_head after it. So next[cur_idx] = old_head
80 * 4) cmpxchg(head, old_head, cur_idx)
81 *
82 * head = 0
83 * next = [2, 2, 3, 4, -1]
84 * free_list = 0->2->3->4->-1 : Example after freeing Node #0
85 *
86 * head = 1
87 * next = [2, 0, 3, 4, -1]
88 * free_list = 1->0->2->3->4->-1 : Example after freeing Node #1
89 */
90
91#ifndef LOCKLESS_ALLOCATOR_PRIV_H
92#define LOCKLESS_ALLOCATOR_PRIV_H
93
94struct nvgpu_allocator;
95
96struct nvgpu_lockless_allocator {
97 struct nvgpu_allocator *owner;
98
99 u64 base; /* Base address of the space. */
100 u64 length; /* Length of the space. */
101 u64 blk_size; /* Size of the structure being allocated */
102 int nr_nodes; /* Number of nodes available for allocation */
103
104 int *next; /* An array holding the next indices per node */
105 int head; /* Current node at the top of the stack */
106
107 u64 flags;
108
109 bool inited;
110
111 /* Statistics */
112 atomic_t nr_allocs;
113};
114
115static inline struct nvgpu_lockless_allocator *lockless_allocator(
116 struct nvgpu_allocator *a)
117{
118 return (struct nvgpu_lockless_allocator *)(a)->priv;
119}
120
121#endif
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
new file mode 100644
index 00000000..ebd779c0
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
@@ -0,0 +1,212 @@
1/*
2 * gk20a allocator
3 *
4 * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/kernel.h>
20#include <linux/slab.h>
21
22#include <nvgpu/allocator.h>
23
24#include "gk20a/gk20a.h"
25#include "gk20a/mm_gk20a.h"
26#include "gk20a/platform_gk20a.h"
27
28u32 nvgpu_alloc_tracing_on;
29
30u64 nvgpu_alloc_length(struct nvgpu_allocator *a)
31{
32 if (a->ops->length)
33 return a->ops->length(a);
34
35 return 0;
36}
37
38u64 nvgpu_alloc_base(struct nvgpu_allocator *a)
39{
40 if (a->ops->base)
41 return a->ops->base(a);
42
43 return 0;
44}
45
46u64 nvgpu_alloc_initialized(struct nvgpu_allocator *a)
47{
48 if (!a->ops || !a->ops->inited)
49 return 0;
50
51 return a->ops->inited(a);
52}
53
54u64 nvgpu_alloc_end(struct nvgpu_allocator *a)
55{
56 if (a->ops->end)
57 return a->ops->end(a);
58
59 return 0;
60}
61
62u64 nvgpu_alloc_space(struct nvgpu_allocator *a)
63{
64 if (a->ops->space)
65 return a->ops->space(a);
66
67 return 0;
68}
69
70u64 nvgpu_alloc(struct nvgpu_allocator *a, u64 len)
71{
72 return a->ops->alloc(a, len);
73}
74
75void nvgpu_free(struct nvgpu_allocator *a, u64 addr)
76{
77 a->ops->free(a, addr);
78}
79
80u64 nvgpu_alloc_fixed(struct nvgpu_allocator *a, u64 base, u64 len)
81{
82 if (a->ops->alloc_fixed)
83 return a->ops->alloc_fixed(a, base, len);
84
85 return 0;
86}
87
88void nvgpu_free_fixed(struct nvgpu_allocator *a, u64 base, u64 len)
89{
90 /*
91 * If this operation is not defined for the allocator then just do
92 * nothing. The alternative would be to fall back on the regular
93 * free but that may be harmful in unexpected ways.
94 */
95 if (a->ops->free_fixed)
96 a->ops->free_fixed(a, base, len);
97}
98
99int nvgpu_alloc_reserve_carveout(struct nvgpu_allocator *a,
100 struct nvgpu_alloc_carveout *co)
101{
102 if (a->ops->reserve_carveout)
103 return a->ops->reserve_carveout(a, co);
104
105 return -ENODEV;
106}
107
108void nvgpu_alloc_release_carveout(struct nvgpu_allocator *a,
109 struct nvgpu_alloc_carveout *co)
110{
111 if (a->ops->release_carveout)
112 a->ops->release_carveout(a, co);
113}
114
115void nvgpu_alloc_destroy(struct nvgpu_allocator *a)
116{
117 a->ops->fini(a);
118 memset(a, 0, sizeof(*a));
119}
120
121/*
122 * Handle the common init stuff for a nvgpu_allocator.
123 */
124int __nvgpu_alloc_common_init(struct nvgpu_allocator *a,
125 const char *name, void *priv, bool dbg,
126 const struct nvgpu_allocator_ops *ops)
127{
128 if (!ops)
129 return -EINVAL;
130
131 /*
132 * This is the bare minimum operations required for a sensible
133 * allocator.
134 */
135 if (!ops->alloc || !ops->free || !ops->fini)
136 return -EINVAL;
137
138 a->ops = ops;
139 a->priv = priv;
140 a->debug = dbg;
141
142 mutex_init(&a->lock);
143
144 strlcpy(a->name, name, sizeof(a->name));
145
146 return 0;
147}
148
149void nvgpu_alloc_print_stats(struct nvgpu_allocator *__a,
150 struct seq_file *s, int lock)
151{
152 __a->ops->print_stats(__a, s, lock);
153}
154
155#ifdef CONFIG_DEBUG_FS
156static int __alloc_show(struct seq_file *s, void *unused)
157{
158 struct nvgpu_allocator *a = s->private;
159
160 nvgpu_alloc_print_stats(a, s, 1);
161
162 return 0;
163}
164
165static int __alloc_open(struct inode *inode, struct file *file)
166{
167 return single_open(file, __alloc_show, inode->i_private);
168}
169
170static const struct file_operations __alloc_fops = {
171 .open = __alloc_open,
172 .read = seq_read,
173 .llseek = seq_lseek,
174 .release = single_release,
175};
176#endif
177
178void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a)
179{
180#ifdef CONFIG_DEBUG_FS
181 if (!g->debugfs_allocators)
182 return;
183
184 a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO,
185 g->debugfs_allocators,
186 a, &__alloc_fops);
187#endif
188}
189
190void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a)
191{
192#ifdef CONFIG_DEBUG_FS
193 if (!IS_ERR_OR_NULL(a->debugfs_entry))
194 debugfs_remove(a->debugfs_entry);
195#endif
196}
197
198void nvgpu_alloc_debugfs_init(struct device *dev)
199{
200#ifdef CONFIG_DEBUG_FS
201 struct gk20a_platform *platform = dev_get_drvdata(dev);
202 struct dentry *gpu_root = platform->debugfs;
203 struct gk20a *g = get_gk20a(dev);
204
205 g->debugfs_allocators = debugfs_create_dir("allocators", gpu_root);
206 if (IS_ERR_OR_NULL(g->debugfs_allocators))
207 return;
208
209 debugfs_create_u32("tracing", 0664, g->debugfs_allocators,
210 &nvgpu_alloc_tracing_on);
211#endif
212}
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c
new file mode 100644
index 00000000..c61b2238
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c
@@ -0,0 +1,937 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/kernel.h>
18#include <linux/slab.h>
19#include <linux/bitops.h>
20#include <linux/mm.h>
21
22#include <nvgpu/allocator.h>
23#include <nvgpu/page_allocator.h>
24
25#include "buddy_allocator_priv.h"
26
27#define palloc_dbg(a, fmt, arg...) \
28 alloc_dbg(palloc_owner(a), fmt, ##arg)
29
30static struct kmem_cache *page_alloc_cache;
31static struct kmem_cache *page_alloc_chunk_cache;
32static struct kmem_cache *page_alloc_slab_page_cache;
33static DEFINE_MUTEX(meta_data_cache_lock);
34
35/*
36 * Handle the book-keeping for these operations.
37 */
38static inline void add_slab_page_to_empty(struct page_alloc_slab *slab,
39 struct page_alloc_slab_page *page)
40{
41 BUG_ON(page->state != SP_NONE);
42 list_add(&page->list_entry, &slab->empty);
43 slab->nr_empty++;
44 page->state = SP_EMPTY;
45}
46static inline void add_slab_page_to_partial(struct page_alloc_slab *slab,
47 struct page_alloc_slab_page *page)
48{
49 BUG_ON(page->state != SP_NONE);
50 list_add(&page->list_entry, &slab->partial);
51 slab->nr_partial++;
52 page->state = SP_PARTIAL;
53}
54static inline void add_slab_page_to_full(struct page_alloc_slab *slab,
55 struct page_alloc_slab_page *page)
56{
57 BUG_ON(page->state != SP_NONE);
58 list_add(&page->list_entry, &slab->full);
59 slab->nr_full++;
60 page->state = SP_FULL;
61}
62
63static inline void del_slab_page_from_empty(struct page_alloc_slab *slab,
64 struct page_alloc_slab_page *page)
65{
66 list_del_init(&page->list_entry);
67 slab->nr_empty--;
68 page->state = SP_NONE;
69}
70static inline void del_slab_page_from_partial(struct page_alloc_slab *slab,
71 struct page_alloc_slab_page *page)
72{
73 list_del_init(&page->list_entry);
74 slab->nr_partial--;
75 page->state = SP_NONE;
76}
77static inline void del_slab_page_from_full(struct page_alloc_slab *slab,
78 struct page_alloc_slab_page *page)
79{
80 list_del_init(&page->list_entry);
81 slab->nr_full--;
82 page->state = SP_NONE;
83}
84
85static u64 nvgpu_page_alloc_length(struct nvgpu_allocator *a)
86{
87 struct nvgpu_page_allocator *va = a->priv;
88
89 return nvgpu_alloc_length(&va->source_allocator);
90}
91
92static u64 nvgpu_page_alloc_base(struct nvgpu_allocator *a)
93{
94 struct nvgpu_page_allocator *va = a->priv;
95
96 return nvgpu_alloc_base(&va->source_allocator);
97}
98
99static int nvgpu_page_alloc_inited(struct nvgpu_allocator *a)
100{
101 struct nvgpu_page_allocator *va = a->priv;
102
103 return nvgpu_alloc_initialized(&va->source_allocator);
104}
105
106static u64 nvgpu_page_alloc_end(struct nvgpu_allocator *a)
107{
108 struct nvgpu_page_allocator *va = a->priv;
109
110 return nvgpu_alloc_end(&va->source_allocator);
111}
112
113static u64 nvgpu_page_alloc_space(struct nvgpu_allocator *a)
114{
115 struct nvgpu_page_allocator *va = a->priv;
116
117 return nvgpu_alloc_space(&va->source_allocator);
118}
119
120static int nvgpu_page_reserve_co(struct nvgpu_allocator *a,
121 struct nvgpu_alloc_carveout *co)
122{
123 struct nvgpu_page_allocator *va = a->priv;
124
125 return nvgpu_alloc_reserve_carveout(&va->source_allocator, co);
126}
127
128static void nvgpu_page_release_co(struct nvgpu_allocator *a,
129 struct nvgpu_alloc_carveout *co)
130{
131 struct nvgpu_page_allocator *va = a->priv;
132
133 nvgpu_alloc_release_carveout(&va->source_allocator, co);
134}
135
136static void __nvgpu_free_pages(struct nvgpu_page_allocator *a,
137 struct nvgpu_page_alloc *alloc,
138 bool free_buddy_alloc)
139{
140 struct page_alloc_chunk *chunk;
141
142 while (!list_empty(&alloc->alloc_chunks)) {
143 chunk = list_first_entry(&alloc->alloc_chunks,
144 struct page_alloc_chunk,
145 list_entry);
146 list_del(&chunk->list_entry);
147
148 if (free_buddy_alloc)
149 nvgpu_free(&a->source_allocator, chunk->base);
150 kfree(chunk);
151 }
152
153 kfree(alloc);
154}
155
156static int __insert_page_alloc(struct nvgpu_page_allocator *a,
157 struct nvgpu_page_alloc *alloc)
158{
159 struct rb_node **new = &a->allocs.rb_node;
160 struct rb_node *parent = NULL;
161
162 while (*new) {
163 struct nvgpu_page_alloc *tmp =
164 container_of(*new, struct nvgpu_page_alloc,
165 tree_entry);
166
167 parent = *new;
168 if (alloc->base < tmp->base) {
169 new = &((*new)->rb_left);
170 } else if (alloc->base > tmp->base) {
171 new = &((*new)->rb_right);
172 } else {
173 WARN(1, "Duplicate entries in allocated list!\n");
174 return 0;
175 }
176 }
177
178 rb_link_node(&alloc->tree_entry, parent, new);
179 rb_insert_color(&alloc->tree_entry, &a->allocs);
180
181 return 0;
182}
183
184static struct nvgpu_page_alloc *__find_page_alloc(
185 struct nvgpu_page_allocator *a,
186 u64 addr)
187{
188 struct rb_node *node = a->allocs.rb_node;
189 struct nvgpu_page_alloc *alloc;
190
191 while (node) {
192 alloc = container_of(node, struct nvgpu_page_alloc, tree_entry);
193
194 if (addr < alloc->base)
195 node = node->rb_left;
196 else if (addr > alloc->base)
197 node = node->rb_right;
198 else
199 break;
200 }
201
202 if (!node)
203 return NULL;
204
205 rb_erase(node, &a->allocs);
206
207 return alloc;
208}
209
210static struct page_alloc_slab_page *alloc_slab_page(
211 struct nvgpu_page_allocator *a,
212 struct page_alloc_slab *slab)
213{
214 struct page_alloc_slab_page *slab_page;
215
216 slab_page = kmem_cache_alloc(page_alloc_slab_page_cache, GFP_KERNEL);
217 if (!slab_page) {
218 palloc_dbg(a, "OOM: unable to alloc slab_page struct!\n");
219 return ERR_PTR(-ENOMEM);
220 }
221
222 memset(slab_page, 0, sizeof(*slab_page));
223
224 slab_page->page_addr = nvgpu_alloc(&a->source_allocator, a->page_size);
225 if (!slab_page->page_addr) {
226 kfree(slab_page);
227 palloc_dbg(a, "OOM: vidmem is full!\n");
228 return ERR_PTR(-ENOMEM);
229 }
230
231 INIT_LIST_HEAD(&slab_page->list_entry);
232 slab_page->slab_size = slab->slab_size;
233 slab_page->nr_objects = (u32)a->page_size / slab->slab_size;
234 slab_page->nr_objects_alloced = 0;
235 slab_page->owner = slab;
236 slab_page->state = SP_NONE;
237
238 a->pages_alloced++;
239
240 palloc_dbg(a, "Allocated new slab page @ 0x%012llx size=%u\n",
241 slab_page->page_addr, slab_page->slab_size);
242
243 return slab_page;
244}
245
246static void free_slab_page(struct nvgpu_page_allocator *a,
247 struct page_alloc_slab_page *slab_page)
248{
249 palloc_dbg(a, "Freeing slab page @ 0x%012llx\n", slab_page->page_addr);
250
251 BUG_ON((slab_page->state != SP_NONE && slab_page->state != SP_EMPTY) ||
252 slab_page->nr_objects_alloced != 0 ||
253 slab_page->bitmap != 0);
254
255 nvgpu_free(&a->source_allocator, slab_page->page_addr);
256 a->pages_freed++;
257
258 kmem_cache_free(page_alloc_slab_page_cache, slab_page);
259}
260
261/*
262 * This expects @alloc to have 1 empty page_alloc_chunk already added to the
263 * alloc_chunks list.
264 */
265static int __do_slab_alloc(struct nvgpu_page_allocator *a,
266 struct page_alloc_slab *slab,
267 struct nvgpu_page_alloc *alloc)
268{
269 struct page_alloc_slab_page *slab_page = NULL;
270 struct page_alloc_chunk *chunk;
271 unsigned long offs;
272
273 /*
274 * Check the partial and empty lists to see if we have some space
275 * readily available. Take the slab_page out of what ever list it
276 * was in since it may be put back into a different list later.
277 */
278 if (!list_empty(&slab->partial)) {
279 slab_page = list_first_entry(&slab->partial,
280 struct page_alloc_slab_page,
281 list_entry);
282 del_slab_page_from_partial(slab, slab_page);
283 } else if (!list_empty(&slab->empty)) {
284 slab_page = list_first_entry(&slab->empty,
285 struct page_alloc_slab_page,
286 list_entry);
287 del_slab_page_from_empty(slab, slab_page);
288 }
289
290 if (!slab_page) {
291 slab_page = alloc_slab_page(a, slab);
292 if (IS_ERR(slab_page))
293 return PTR_ERR(slab_page);
294 }
295
296 /*
297 * We now have a slab_page. Do the alloc.
298 */
299 offs = bitmap_find_next_zero_area(&slab_page->bitmap,
300 slab_page->nr_objects,
301 0, 1, 0);
302 if (offs >= slab_page->nr_objects) {
303 WARN(1, "Empty/partial slab with no free objects?");
304
305 /* Add the buggy page to the full list... This isn't ideal. */
306 add_slab_page_to_full(slab, slab_page);
307 return -ENOMEM;
308 }
309
310 bitmap_set(&slab_page->bitmap, offs, 1);
311 slab_page->nr_objects_alloced++;
312
313 if (slab_page->nr_objects_alloced < slab_page->nr_objects)
314 add_slab_page_to_partial(slab, slab_page);
315 else if (slab_page->nr_objects_alloced == slab_page->nr_objects)
316 add_slab_page_to_full(slab, slab_page);
317 else
318 BUG(); /* Should be impossible to hit this. */
319
320 /*
321 * Handle building the nvgpu_page_alloc struct. We expect one
322 * page_alloc_chunk to be present.
323 */
324 alloc->slab_page = slab_page;
325 alloc->nr_chunks = 1;
326 alloc->length = slab_page->slab_size;
327 alloc->base = slab_page->page_addr + (offs * slab_page->slab_size);
328
329 chunk = list_first_entry(&alloc->alloc_chunks,
330 struct page_alloc_chunk, list_entry);
331 chunk->base = alloc->base;
332 chunk->length = alloc->length;
333
334 return 0;
335}
336
337/*
338 * Allocate from a slab instead of directly from the page allocator.
339 */
340static struct nvgpu_page_alloc *__nvgpu_alloc_slab(
341 struct nvgpu_page_allocator *a, u64 len)
342{
343 int err, slab_nr;
344 struct page_alloc_slab *slab;
345 struct nvgpu_page_alloc *alloc = NULL;
346 struct page_alloc_chunk *chunk = NULL;
347
348 /*
349 * Align the length to a page and then divide by the page size (4k for
350 * this code). ilog2() of that then gets us the correct slab to use.
351 */
352 slab_nr = (int)ilog2(PAGE_ALIGN(len) >> 12);
353 slab = &a->slabs[slab_nr];
354
355 alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL);
356 if (!alloc) {
357 palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n");
358 goto fail;
359 }
360 chunk = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL);
361 if (!chunk) {
362 palloc_dbg(a, "OOM: could not alloc alloc_chunk struct!\n");
363 goto fail;
364 }
365
366 INIT_LIST_HEAD(&alloc->alloc_chunks);
367 list_add(&chunk->list_entry, &alloc->alloc_chunks);
368
369 err = __do_slab_alloc(a, slab, alloc);
370 if (err)
371 goto fail;
372
373 palloc_dbg(a, "Alloc 0x%04llx sr=%d id=0x%010llx [slab]\n",
374 len, slab_nr, alloc->base);
375 a->nr_slab_allocs++;
376
377 return alloc;
378
379fail:
380 kfree(alloc);
381 kfree(chunk);
382 return NULL;
383}
384
385static void __nvgpu_free_slab(struct nvgpu_page_allocator *a,
386 struct nvgpu_page_alloc *alloc)
387{
388 struct page_alloc_slab_page *slab_page = alloc->slab_page;
389 struct page_alloc_slab *slab = slab_page->owner;
390 enum slab_page_state new_state;
391 int offs;
392
393 offs = (u32)(alloc->base - slab_page->page_addr) / slab_page->slab_size;
394 bitmap_clear(&slab_page->bitmap, offs, 1);
395
396 slab_page->nr_objects_alloced--;
397
398 if (slab_page->nr_objects_alloced == 0)
399 new_state = SP_EMPTY;
400 else
401 new_state = SP_PARTIAL;
402
403 /*
404 * Need to migrate the page to a different list.
405 */
406 if (new_state != slab_page->state) {
407 /* Delete - can't be in empty. */
408 if (slab_page->state == SP_PARTIAL)
409 del_slab_page_from_partial(slab, slab_page);
410 else
411 del_slab_page_from_full(slab, slab_page);
412
413 /* And add. */
414 if (new_state == SP_EMPTY) {
415 if (list_empty(&slab->empty))
416 add_slab_page_to_empty(slab, slab_page);
417 else
418 free_slab_page(a, slab_page);
419 } else {
420 add_slab_page_to_partial(slab, slab_page);
421 }
422 }
423
424 /*
425 * Now handle the page_alloc.
426 */
427 __nvgpu_free_pages(a, alloc, false);
428 a->nr_slab_frees++;
429
430 return;
431}
432
433/*
434 * Allocate physical pages. Since the underlying allocator is a buddy allocator
435 * the returned pages are always contiguous. However, since there could be
436 * fragmentation in the space this allocator will collate smaller non-contiguous
437 * allocations together if necessary.
438 */
439static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
440 struct nvgpu_page_allocator *a, u64 pages)
441{
442 struct nvgpu_page_alloc *alloc;
443 struct page_alloc_chunk *c;
444 u64 max_chunk_len = pages << a->page_shift;
445 int i = 0;
446
447 alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL);
448 if (!alloc)
449 goto fail;
450
451 memset(alloc, 0, sizeof(*alloc));
452
453 INIT_LIST_HEAD(&alloc->alloc_chunks);
454 alloc->length = pages << a->page_shift;
455
456 while (pages) {
457 u64 chunk_addr = 0;
458 u64 chunk_pages = (u64)1 << __fls(pages);
459 u64 chunk_len = chunk_pages << a->page_shift;
460
461 /*
462 * Take care of the possibility that the allocation must be
463 * contiguous. If this is not the first iteration then that
464 * means the first iteration failed to alloc the entire
465 * requested size. The buddy allocator guarantees any given
466 * single alloc is contiguous.
467 */
468 if (a->flags & GPU_ALLOC_FORCE_CONTIG && i != 0)
469 goto fail_cleanup;
470
471 if (chunk_len > max_chunk_len)
472 chunk_len = max_chunk_len;
473
474 /*
475 * Keep attempting to allocate in smaller chunks until the alloc
476 * either succeeds or is smaller than the page_size of the
477 * allocator (i.e the allocator is OOM).
478 */
479 do {
480 chunk_addr = nvgpu_alloc(&a->source_allocator,
481 chunk_len);
482
483 /* Divide by 2 and try again */
484 if (!chunk_addr) {
485 palloc_dbg(a, "balloc failed: 0x%llx\n",
486 chunk_len);
487 chunk_len >>= 1;
488 max_chunk_len = chunk_len;
489 }
490 } while (!chunk_addr && chunk_len >= a->page_size);
491
492 chunk_pages = chunk_len >> a->page_shift;
493
494 if (!chunk_addr) {
495 palloc_dbg(a, "bailing @ 0x%llx\n", chunk_len);
496 goto fail_cleanup;
497 }
498
499 c = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL);
500 if (!c) {
501 nvgpu_free(&a->source_allocator, chunk_addr);
502 goto fail_cleanup;
503 }
504
505 pages -= chunk_pages;
506
507 c->base = chunk_addr;
508 c->length = chunk_len;
509 list_add(&c->list_entry, &alloc->alloc_chunks);
510
511 i++;
512 }
513
514 alloc->nr_chunks = i;
515 c = list_first_entry(&alloc->alloc_chunks,
516 struct page_alloc_chunk, list_entry);
517 alloc->base = c->base;
518
519 return alloc;
520
521fail_cleanup:
522 while (!list_empty(&alloc->alloc_chunks)) {
523 c = list_first_entry(&alloc->alloc_chunks,
524 struct page_alloc_chunk, list_entry);
525 list_del(&c->list_entry);
526 nvgpu_free(&a->source_allocator, c->base);
527 kfree(c);
528 }
529 kfree(alloc);
530fail:
531 return ERR_PTR(-ENOMEM);
532}
533
534static struct nvgpu_page_alloc *__nvgpu_alloc_pages(
535 struct nvgpu_page_allocator *a, u64 len)
536{
537 struct nvgpu_page_alloc *alloc = NULL;
538 struct page_alloc_chunk *c;
539 u64 pages;
540 int i = 0;
541
542 pages = ALIGN(len, a->page_size) >> a->page_shift;
543
544 alloc = __do_nvgpu_alloc_pages(a, pages);
545 if (IS_ERR(alloc)) {
546 palloc_dbg(a, "Alloc 0x%llx (%llu) (failed)\n",
547 pages << a->page_shift, pages);
548 return NULL;
549 }
550
551 palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n",
552 pages << a->page_shift, pages, alloc->base);
553 list_for_each_entry(c, &alloc->alloc_chunks, list_entry) {
554 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n",
555 i++, c->base, c->length);
556 }
557
558 return alloc;
559}
560
561/*
562 * Allocate enough pages to satisfy @len. Page size is determined at
563 * initialization of the allocator.
564 *
565 * The return is actually a pointer to a struct nvgpu_page_alloc pointer. This
566 * is because it doesn't make a lot of sense to return the address of the first
567 * page in the list of pages (since they could be discontiguous). This has
568 * precedent in the dma_alloc APIs, though, it's really just an annoying
569 * artifact of the fact that the nvgpu_alloc() API requires a u64 return type.
570 */
571static u64 nvgpu_page_alloc(struct nvgpu_allocator *__a, u64 len)
572{
573 struct nvgpu_page_allocator *a = page_allocator(__a);
574 struct nvgpu_page_alloc *alloc = NULL;
575 u64 real_len;
576
577 /*
578 * If we want contig pages we have to round up to a power of two. It's
579 * easier to do that here than in the buddy allocator.
580 */
581 real_len = a->flags & GPU_ALLOC_FORCE_CONTIG ?
582 roundup_pow_of_two(len) : len;
583
584 alloc_lock(__a);
585 if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES &&
586 real_len <= (a->page_size / 2))
587 alloc = __nvgpu_alloc_slab(a, real_len);
588 else
589 alloc = __nvgpu_alloc_pages(a, real_len);
590
591 if (!alloc) {
592 alloc_unlock(__a);
593 return 0;
594 }
595
596 __insert_page_alloc(a, alloc);
597
598 a->nr_allocs++;
599 if (real_len > a->page_size / 2)
600 a->pages_alloced += alloc->length >> a->page_shift;
601 alloc_unlock(__a);
602
603 if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER)
604 return alloc->base;
605 else
606 return (u64) (uintptr_t) alloc;
607}
608
609/*
610 * Note: this will remove the nvgpu_page_alloc struct from the RB tree
611 * if it's found.
612 */
613static void nvgpu_page_free(struct nvgpu_allocator *__a, u64 base)
614{
615 struct nvgpu_page_allocator *a = page_allocator(__a);
616 struct nvgpu_page_alloc *alloc;
617
618 alloc_lock(__a);
619
620 if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER)
621 alloc = __find_page_alloc(a, base);
622 else
623 alloc = __find_page_alloc(a,
624 ((struct nvgpu_page_alloc *)(uintptr_t)base)->base);
625
626 if (!alloc) {
627 palloc_dbg(a, "Hrm, found no alloc?\n");
628 goto done;
629 }
630
631 a->nr_frees++;
632
633 palloc_dbg(a, "Free 0x%llx id=0x%010llx\n",
634 alloc->length, alloc->base);
635
636 /*
637 * Frees *alloc.
638 */
639 if (alloc->slab_page) {
640 __nvgpu_free_slab(a, alloc);
641 } else {
642 a->pages_freed += (alloc->length >> a->page_shift);
643 __nvgpu_free_pages(a, alloc, true);
644 }
645
646done:
647 alloc_unlock(__a);
648}
649
650static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed(
651 struct nvgpu_page_allocator *a, u64 base, u64 length)
652{
653 struct nvgpu_page_alloc *alloc;
654 struct page_alloc_chunk *c;
655
656 alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL);
657 c = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL);
658 if (!alloc || !c)
659 goto fail;
660
661 alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length);
662 if (!alloc->base) {
663 WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base);
664 goto fail;
665 }
666
667 alloc->nr_chunks = 1;
668 alloc->length = length;
669 INIT_LIST_HEAD(&alloc->alloc_chunks);
670
671 c->base = alloc->base;
672 c->length = length;
673 list_add(&c->list_entry, &alloc->alloc_chunks);
674
675 return alloc;
676
677fail:
678 kfree(c);
679 kfree(alloc);
680 return ERR_PTR(-ENOMEM);
681}
682
683static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a,
684 u64 base, u64 len)
685{
686 struct nvgpu_page_allocator *a = page_allocator(__a);
687 struct nvgpu_page_alloc *alloc = NULL;
688 struct page_alloc_chunk *c;
689 u64 aligned_len, pages;
690 int i = 0;
691
692 aligned_len = ALIGN(len, a->page_size);
693 pages = aligned_len >> a->page_shift;
694
695 alloc_lock(__a);
696
697 alloc = __nvgpu_alloc_pages_fixed(a, base, aligned_len);
698 if (IS_ERR(alloc)) {
699 alloc_unlock(__a);
700 return 0;
701 }
702
703 __insert_page_alloc(a, alloc);
704 alloc_unlock(__a);
705
706 palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n",
707 alloc->base, aligned_len, pages);
708 list_for_each_entry(c, &alloc->alloc_chunks, list_entry) {
709 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n",
710 i++, c->base, c->length);
711 }
712
713 a->nr_fixed_allocs++;
714 a->pages_alloced += pages;
715
716 if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER)
717 return alloc->base;
718 else
719 return (u64) (uintptr_t) alloc;
720}
721
722static void nvgpu_page_free_fixed(struct nvgpu_allocator *__a,
723 u64 base, u64 len)
724{
725 struct nvgpu_page_allocator *a = page_allocator(__a);
726 struct nvgpu_page_alloc *alloc;
727
728 alloc_lock(__a);
729
730 if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) {
731 alloc = __find_page_alloc(a, base);
732 if (!alloc)
733 goto done;
734 } else {
735 alloc = (struct nvgpu_page_alloc *) (uintptr_t) base;
736 }
737
738 palloc_dbg(a, "Free [fixed] 0x%010llx + 0x%llx\n",
739 alloc->base, alloc->length);
740
741 a->nr_fixed_frees++;
742 a->pages_freed += (alloc->length >> a->page_shift);
743
744 /*
745 * This works for the time being since the buddy allocator
746 * uses the same free function for both fixed and regular
747 * allocs. This would have to be updated if the underlying
748 * allocator were to change.
749 */
750 __nvgpu_free_pages(a, alloc, true);
751
752done:
753 alloc_unlock(__a);
754}
755
756static void nvgpu_page_allocator_destroy(struct nvgpu_allocator *__a)
757{
758 struct nvgpu_page_allocator *a = page_allocator(__a);
759
760 alloc_lock(__a);
761 kfree(a);
762 __a->priv = NULL;
763 alloc_unlock(__a);
764}
765
766static void nvgpu_page_print_stats(struct nvgpu_allocator *__a,
767 struct seq_file *s, int lock)
768{
769 struct nvgpu_page_allocator *a = page_allocator(__a);
770 int i;
771
772 if (lock)
773 alloc_lock(__a);
774
775 __alloc_pstat(s, __a, "Page allocator:\n");
776 __alloc_pstat(s, __a, " allocs %lld\n", a->nr_allocs);
777 __alloc_pstat(s, __a, " frees %lld\n", a->nr_frees);
778 __alloc_pstat(s, __a, " fixed_allocs %lld\n", a->nr_fixed_allocs);
779 __alloc_pstat(s, __a, " fixed_frees %lld\n", a->nr_fixed_frees);
780 __alloc_pstat(s, __a, " slab_allocs %lld\n", a->nr_slab_allocs);
781 __alloc_pstat(s, __a, " slab_frees %lld\n", a->nr_slab_frees);
782 __alloc_pstat(s, __a, " pages alloced %lld\n", a->pages_alloced);
783 __alloc_pstat(s, __a, " pages freed %lld\n", a->pages_freed);
784 __alloc_pstat(s, __a, "\n");
785
786 /*
787 * Slab info.
788 */
789 if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES) {
790 __alloc_pstat(s, __a, "Slabs:\n");
791 __alloc_pstat(s, __a, " size empty partial full\n");
792 __alloc_pstat(s, __a, " ---- ----- ------- ----\n");
793
794 for (i = 0; i < a->nr_slabs; i++) {
795 struct page_alloc_slab *slab = &a->slabs[i];
796
797 __alloc_pstat(s, __a, " %-9u %-9d %-9u %u\n",
798 slab->slab_size,
799 slab->nr_empty, slab->nr_partial,
800 slab->nr_full);
801 }
802 __alloc_pstat(s, __a, "\n");
803 }
804
805 __alloc_pstat(s, __a, "Source alloc: %s\n",
806 a->source_allocator.name);
807 nvgpu_alloc_print_stats(&a->source_allocator, s, lock);
808
809 if (lock)
810 alloc_unlock(__a);
811}
812
813static const struct nvgpu_allocator_ops page_ops = {
814 .alloc = nvgpu_page_alloc,
815 .free = nvgpu_page_free,
816
817 .alloc_fixed = nvgpu_page_alloc_fixed,
818 .free_fixed = nvgpu_page_free_fixed,
819
820 .reserve_carveout = nvgpu_page_reserve_co,
821 .release_carveout = nvgpu_page_release_co,
822
823 .base = nvgpu_page_alloc_base,
824 .length = nvgpu_page_alloc_length,
825 .end = nvgpu_page_alloc_end,
826 .inited = nvgpu_page_alloc_inited,
827 .space = nvgpu_page_alloc_space,
828
829 .fini = nvgpu_page_allocator_destroy,
830
831 .print_stats = nvgpu_page_print_stats,
832};
833
834/*
835 * nr_slabs is computed as follows: divide page_size by 4096 to get number of
836 * 4k pages in page_size. Then take the base 2 log of that to get number of
837 * slabs. For 64k page_size that works on like:
838 *
839 * 1024*64 / 1024*4 = 16
840 * ilog2(16) = 4
841 *
842 * That gives buckets of 1, 2, 4, and 8 pages (i.e 4k, 8k, 16k, 32k).
843 */
844static int nvgpu_page_alloc_init_slabs(struct nvgpu_page_allocator *a)
845{
846 size_t nr_slabs = ilog2(a->page_size >> 12);
847 unsigned int i;
848
849 a->slabs = kcalloc(nr_slabs,
850 sizeof(struct page_alloc_slab),
851 GFP_KERNEL);
852 if (!a->slabs)
853 return -ENOMEM;
854 a->nr_slabs = nr_slabs;
855
856 for (i = 0; i < nr_slabs; i++) {
857 struct page_alloc_slab *slab = &a->slabs[i];
858
859 slab->slab_size = SZ_4K * (1 << i);
860 INIT_LIST_HEAD(&slab->empty);
861 INIT_LIST_HEAD(&slab->partial);
862 INIT_LIST_HEAD(&slab->full);
863 slab->nr_empty = 0;
864 slab->nr_partial = 0;
865 slab->nr_full = 0;
866 }
867
868 return 0;
869}
870
871int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
872 const char *name, u64 base, u64 length,
873 u64 blk_size, u64 flags)
874{
875 struct nvgpu_page_allocator *a;
876 char buddy_name[sizeof(__a->name)];
877 int err;
878
879 mutex_lock(&meta_data_cache_lock);
880 if (!page_alloc_cache)
881 page_alloc_cache = KMEM_CACHE(nvgpu_page_alloc, 0);
882 if (!page_alloc_chunk_cache)
883 page_alloc_chunk_cache = KMEM_CACHE(page_alloc_chunk, 0);
884 if (!page_alloc_slab_page_cache)
885 page_alloc_slab_page_cache =
886 KMEM_CACHE(page_alloc_slab_page, 0);
887 mutex_unlock(&meta_data_cache_lock);
888
889 if (!page_alloc_cache || !page_alloc_chunk_cache)
890 return -ENOMEM;
891
892 if (blk_size < SZ_4K)
893 return -EINVAL;
894
895 a = kzalloc(sizeof(struct nvgpu_page_allocator), GFP_KERNEL);
896 if (!a)
897 return -ENOMEM;
898
899 err = __nvgpu_alloc_common_init(__a, name, a, false, &page_ops);
900 if (err)
901 goto fail;
902
903 a->base = base;
904 a->length = length;
905 a->page_size = blk_size;
906 a->page_shift = __ffs(blk_size);
907 a->allocs = RB_ROOT;
908 a->owner = __a;
909 a->flags = flags;
910
911 if (flags & GPU_ALLOC_4K_VIDMEM_PAGES && blk_size > SZ_4K) {
912 err = nvgpu_page_alloc_init_slabs(a);
913 if (err)
914 goto fail;
915 }
916
917 snprintf(buddy_name, sizeof(buddy_name), "%s-src", name);
918
919 err = nvgpu_buddy_allocator_init(g, &a->source_allocator, buddy_name,
920 base, length, blk_size, 0);
921 if (err)
922 goto fail;
923
924 nvgpu_init_alloc_debug(g, __a);
925 palloc_dbg(a, "New allocator: type page\n");
926 palloc_dbg(a, " base 0x%llx\n", a->base);
927 palloc_dbg(a, " size 0x%llx\n", a->length);
928 palloc_dbg(a, " page_size 0x%llx\n", a->page_size);
929 palloc_dbg(a, " flags 0x%llx\n", a->flags);
930 palloc_dbg(a, " slabs: %d\n", a->nr_slabs);
931
932 return 0;
933
934fail:
935 kfree(a);
936 return err;
937}