summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2016-06-30 13:22:48 -0400
committerAlex Waterman <alexw@nvidia.com>2016-07-19 14:32:38 -0400
commitba2014d367339e77f8035e087c870032c510fd61 (patch)
treeb2a1b8dc5a4055de30b2c48279c1263777cef1b5 /drivers/gpu
parentf99e05006f9f60b6d0bb5c05a5cdddf5fea4cc81 (diff)
gpu: nvgpu: Implement a bitmap allocator
Implement a bitmap allocator for GPU use. This allocator is useful for managing memory (or resource) regions where the buddy allocator is not ideal. Some instances are small regions or where the resource management must not make calls to the kernel's memory allocation routines (anything that ultimately calls alloc_page()). The code path where this avoidance of alloc_page() is most required is the gpfifo submit path. In order to keep this routine fast and have predicable time constraints no alloc_page() calls is necessary. The buddy allocator does not work for this since every time a buddy is allocated there is the possibility that a pair (or more) buddy structs have to be made. These allocs could perhaps require a call into alloc_page() if there is not enouch space in the kmem_cache slab for the buddy structs. Change-Id: Ia46fce62d4bdafcebbc153b21b515cb51641d241 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1176446 Reviewed-by: Yu-Huan Hsu <yhsu@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/nvgpu/Makefile1
-rw-r--r--drivers/gpu/nvgpu/gk20a/bitmap_allocator_priv.h61
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator.h35
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator_bitmap.c423
4 files changed, 519 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index a9a0f92f..1cf2ea37 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -53,6 +53,7 @@ nvgpu-y := \
53 gk20a/hal.o \ 53 gk20a/hal.o \
54 gk20a/hal_gk20a.o \ 54 gk20a/hal_gk20a.o \
55 gk20a/gk20a_allocator.o \ 55 gk20a/gk20a_allocator.o \
56 gk20a/gk20a_allocator_bitmap.o \
56 gk20a/gk20a_allocator_buddy.o \ 57 gk20a/gk20a_allocator_buddy.o \
57 gk20a/cde_gk20a.o \ 58 gk20a/cde_gk20a.o \
58 gk20a/platform_gk20a_generic.o \ 59 gk20a/platform_gk20a_generic.o \
diff --git a/drivers/gpu/nvgpu/gk20a/bitmap_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/bitmap_allocator_priv.h
new file mode 100644
index 00000000..053a6425
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/bitmap_allocator_priv.h
@@ -0,0 +1,61 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef BITMAP_ALLOCATOR_PRIV_H
18#define BITMAP_ALLOCATOR_PRIV_H
19
20#include <linux/rbtree.h>
21
22struct gk20a_allocator;
23
24struct gk20a_bitmap_allocator {
25 struct gk20a_allocator *owner;
26
27 u64 base; /* Base address of the space. */
28 u64 length; /* Length of the space. */
29 u64 blk_size; /* Size that corresponds to 1 bit. */
30 u64 blk_shift; /* Bit shift to divide by blk_size. */
31 u64 num_bits; /* Number of allocatable bits. */
32 u64 bit_offs; /* Offset of bitmap. */
33
34 unsigned long *bitmap; /* The actual bitmap! */
35 struct rb_root allocs; /* Tree of outstanding allocations. */
36
37 u64 flags;
38
39 bool inited;
40
41 /* Statistics */
42 u64 nr_allocs;
43 u64 nr_fixed_allocs;
44 u64 bytes_alloced;
45 u64 bytes_freed;
46};
47
48struct gk20a_bitmap_alloc {
49 u64 base;
50 u64 length;
51 struct rb_node alloc_entry; /* RB tree of allocations. */
52};
53
54static inline struct gk20a_bitmap_allocator *bitmap_allocator(
55 struct gk20a_allocator *a)
56{
57 return (struct gk20a_bitmap_allocator *)(a)->priv;
58}
59
60
61#endif
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
index 5d6b9426..26612bf9 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
@@ -73,9 +73,35 @@ struct gk20a_allocator {
73}; 73};
74 74
75/* 75/*
76 * Allocator flags. 76 * These are the available allocator flags.
77 *
78 * GPU_ALLOC_GVA_SPACE
79 *
80 * This flag makes sense for the buddy allocator only. It specifies that the
81 * allocator will be used for managing a GVA space. When managing GVA spaces
82 * special care has to be taken to ensure that allocations of similar PTE
83 * sizes are placed in the same PDE block. This allows the higher level
84 * code to skip defining both small and large PTE tables for every PDE. That
85 * can save considerable memory for address spaces that have a lot of
86 * allocations.
87 *
88 * GPU_ALLOC_NO_ALLOC_PAGE
89 *
90 * For any allocator that needs to manage a resource in a latency critical
91 * path this flag specifies that the allocator should not use any kmalloc()
92 * or similar functions during normal operation. Initialization routines
93 * may still use kmalloc(). This prevents the possibility of long waits for
94 * pages when using alloc_page(). Currently only the bitmap allocator
95 * implements this functionality.
96 *
97 * Also note that if you accept this flag then you must also define the
98 * free_fixed() function. Since no meta-data is allocated to help free
99 * allocations you need to keep track of the meta-data yourself (in this
100 * case the base and length of the allocation as opposed to just the base
101 * of the allocation).
77 */ 102 */
78#define GPU_ALLOC_GVA_SPACE 0x1 103#define GPU_ALLOC_GVA_SPACE 0x1
104#define GPU_ALLOC_NO_ALLOC_PAGE 0x2
79 105
80static inline void alloc_lock(struct gk20a_allocator *a) 106static inline void alloc_lock(struct gk20a_allocator *a)
81{ 107{
@@ -98,6 +124,13 @@ int gk20a_buddy_allocator_init(struct gk20a_allocator *allocator,
98 const char *name, u64 base, u64 size, 124 const char *name, u64 base, u64 size,
99 u64 blk_size, u64 flags); 125 u64 blk_size, u64 flags);
100 126
127/*
128 * Bitmap initializers.
129 */
130int gk20a_bitmap_allocator_init(struct gk20a_allocator *__a,
131 const char *name, u64 base, u64 length,
132 u64 blk_size, u64 flags);
133
101#define GPU_BALLOC_MAX_ORDER 31 134#define GPU_BALLOC_MAX_ORDER 31
102 135
103/* 136/*
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_bitmap.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_bitmap.c
new file mode 100644
index 00000000..2ddabc62
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_bitmap.c
@@ -0,0 +1,423 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/kernel.h>
18#include <linux/slab.h>
19#include <linux/bitops.h>
20
21#include "gk20a_allocator.h"
22#include "bitmap_allocator_priv.h"
23
24static struct kmem_cache *meta_data_cache; /* slab cache for meta data. */
25static DEFINE_MUTEX(meta_data_cache_lock);
26
27static u64 gk20a_bitmap_alloc_length(struct gk20a_allocator *a)
28{
29 struct gk20a_bitmap_allocator *ba = a->priv;
30
31 return ba->length;
32}
33
34static u64 gk20a_bitmap_alloc_base(struct gk20a_allocator *a)
35{
36 struct gk20a_bitmap_allocator *ba = a->priv;
37
38 return ba->base;
39}
40
41static int gk20a_bitmap_alloc_inited(struct gk20a_allocator *a)
42{
43 struct gk20a_bitmap_allocator *ba = a->priv;
44
45 return ba->inited;
46}
47
48static u64 gk20a_bitmap_alloc_end(struct gk20a_allocator *a)
49{
50 struct gk20a_bitmap_allocator *ba = a->priv;
51
52 return ba->base + ba->length;
53}
54
55static u64 gk20a_bitmap_alloc_fixed(struct gk20a_allocator *__a,
56 u64 base, u64 len)
57{
58 struct gk20a_bitmap_allocator *a = bitmap_allocator(__a);
59 u64 blks, offs, ret;
60
61 /* Compute the bit offset and make sure it's aligned to a block. */
62 offs = base >> a->blk_shift;
63 if (offs * a->blk_size != base)
64 return 0;
65
66 offs -= a->bit_offs;
67
68 blks = len >> a->blk_shift;
69 if (blks * a->blk_size != len)
70 blks++;
71
72 alloc_lock(__a);
73
74 /* Check if the space requested is already occupied. */
75 ret = bitmap_find_next_zero_area(a->bitmap, a->num_bits, offs, blks, 0);
76 if (ret != offs)
77 goto fail;
78
79 bitmap_set(a->bitmap, offs, blks);
80
81 a->bytes_alloced += blks * a->blk_size;
82 a->nr_fixed_allocs++;
83 alloc_unlock(__a);
84
85 alloc_dbg(__a, "Alloc-fixed 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]\n",
86 base, len, blks, blks);
87 return base;
88
89fail:
90 alloc_unlock(__a);
91 alloc_dbg(__a, "Alloc-fixed failed! (0x%llx)\n", base);
92 return 0;
93}
94
95/*
96 * Two possibilities for this function: either we are freeing a fixed allocation
97 * or we are freeing a regular alloc but with GPU_ALLOC_NO_ALLOC_PAGE defined.
98 *
99 * Note: this function won't do much error checking. Thus you could really
100 * confuse the allocator if you misuse this function.
101 */
102static void gk20a_bitmap_free_fixed(struct gk20a_allocator *__a,
103 u64 base, u64 len)
104{
105 struct gk20a_bitmap_allocator *a = bitmap_allocator(__a);
106 u64 blks, offs;
107
108 offs = base >> a->blk_shift;
109 if (WARN_ON(offs * a->blk_size != base))
110 return;
111
112 offs -= a->bit_offs;
113
114 blks = len >> a->blk_shift;
115 if (blks * a->blk_size != len)
116 blks++;
117
118 alloc_lock(__a);
119 bitmap_clear(a->bitmap, offs, blks);
120 a->bytes_freed += blks * a->blk_size;
121 alloc_unlock(__a);
122
123 alloc_dbg(__a, "Free-fixed 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]\n",
124 base, len, blks, blks);
125}
126
127/*
128 * Add the passed alloc to the tree of stored allocations.
129 */
130static void insert_alloc_metadata(struct gk20a_bitmap_allocator *a,
131 struct gk20a_bitmap_alloc *alloc)
132{
133 struct rb_node **new = &a->allocs.rb_node;
134 struct rb_node *parent = NULL;
135 struct gk20a_bitmap_alloc *tmp;
136
137 while (*new) {
138 tmp = container_of(*new, struct gk20a_bitmap_alloc,
139 alloc_entry);
140
141 parent = *new;
142 if (alloc->base < tmp->base)
143 new = &((*new)->rb_left);
144 else if (alloc->base > tmp->base)
145 new = &((*new)->rb_right);
146 else {
147 WARN_ON("Duplicate entries in RB alloc tree!\n");
148 return;
149 }
150 }
151
152 rb_link_node(&alloc->alloc_entry, parent, new);
153 rb_insert_color(&alloc->alloc_entry, &a->allocs);
154}
155
156/*
157 * Find and remove meta-data from the outstanding allocations.
158 */
159static struct gk20a_bitmap_alloc *find_alloc_metadata(
160 struct gk20a_bitmap_allocator *a, u64 addr)
161{
162 struct rb_node *node = a->allocs.rb_node;
163 struct gk20a_bitmap_alloc *alloc;
164
165 while (node) {
166 alloc = container_of(node, struct gk20a_bitmap_alloc,
167 alloc_entry);
168
169 if (addr < alloc->base)
170 node = node->rb_left;
171 else if (addr > alloc->base)
172 node = node->rb_right;
173 else
174 break;
175 }
176
177 if (!node)
178 return NULL;
179
180 rb_erase(node, &a->allocs);
181
182 return alloc;
183}
184
185/*
186 * Tree of alloc meta data stores the address of the alloc not the bit offset.
187 */
188static int __gk20a_bitmap_store_alloc(struct gk20a_bitmap_allocator *a,
189 u64 addr, u64 len)
190{
191 struct gk20a_bitmap_alloc *alloc =
192 kmem_cache_alloc(meta_data_cache, GFP_KERNEL);
193
194 if (!alloc)
195 return -ENOMEM;
196
197 alloc->base = addr;
198 alloc->length = len;
199
200 insert_alloc_metadata(a, alloc);
201
202 return 0;
203}
204
205/*
206 * @len is in bytes. This routine will figure out the right number of bits to
207 * actually allocate. The return is the address in bytes as well.
208 */
209static u64 gk20a_bitmap_alloc(struct gk20a_allocator *__a, u64 len)
210{
211 u64 blks, addr;
212 unsigned long offs, adjusted_offs;
213 struct gk20a_bitmap_allocator *a = bitmap_allocator(__a);
214
215 blks = len >> a->blk_shift;
216
217 if (blks * a->blk_size != len)
218 blks++;
219
220 alloc_lock(__a);
221
222 offs = bitmap_find_next_zero_area(a->bitmap, a->num_bits, 0, blks, 0);
223 if (offs >= a->num_bits)
224 goto fail;
225
226 bitmap_set(a->bitmap, offs, blks);
227
228 adjusted_offs = offs + a->bit_offs;
229 addr = ((u64)adjusted_offs) * a->blk_size;
230
231 /*
232 * Only do meta-data storage if we are allowed to allocate storage for
233 * that meta-data. The issue with using kmalloc() and friends is that
234 * in latency and success critical paths an alloc_page() call can either
235 * sleep for potentially a long time or, assuming GFP_ATOMIC, fail.
236 * Since we might not want either of these possibilities assume that the
237 * caller will keep what data it needs around to successfully free this
238 * allocation.
239 */
240 if (!(a->flags & GPU_ALLOC_NO_ALLOC_PAGE) &&
241 __gk20a_bitmap_store_alloc(a, addr, blks * a->blk_size))
242 goto fail_reset_bitmap;
243
244 alloc_dbg(__a, "Alloc 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]\n",
245 addr, len, blks, blks);
246
247 a->nr_allocs++;
248 a->bytes_alloced += (blks * a->blk_size);
249 alloc_unlock(__a);
250
251 return addr;
252
253fail_reset_bitmap:
254 bitmap_clear(a->bitmap, offs, blks);
255fail:
256 alloc_unlock(__a);
257 alloc_dbg(__a, "Alloc failed!\n");
258 return 0;
259}
260
261static void gk20a_bitmap_free(struct gk20a_allocator *__a, u64 addr)
262{
263 struct gk20a_bitmap_allocator *a = bitmap_allocator(__a);
264 struct gk20a_bitmap_alloc *alloc = NULL;
265 u64 offs, adjusted_offs, blks;
266
267 alloc_lock(__a);
268
269 if (a->flags & GPU_ALLOC_NO_ALLOC_PAGE) {
270 WARN(1, "Using wrong free for NO_ALLOC_PAGE bitmap allocator");
271 goto done;
272 }
273
274 alloc = find_alloc_metadata(a, addr);
275 if (!alloc)
276 goto done;
277
278 /*
279 * Address comes from adjusted offset (i.e the bit offset with
280 * a->bit_offs added. So start with that and then work out the real
281 * offs into the bitmap.
282 */
283 adjusted_offs = addr >> a->blk_shift;
284 offs = adjusted_offs - a->bit_offs;
285 blks = alloc->length >> a->blk_shift;
286
287 bitmap_clear(a->bitmap, offs, blks);
288 alloc_dbg(__a, "Free 0x%-10llx \n", addr);
289
290 a->bytes_freed += alloc->length;
291
292done:
293 kfree(alloc);
294 alloc_unlock(__a);
295}
296
297static void gk20a_bitmap_alloc_destroy(struct gk20a_allocator *__a)
298{
299 struct gk20a_bitmap_allocator *a = bitmap_allocator(__a);
300 struct gk20a_bitmap_alloc *alloc;
301 struct rb_node *node;
302
303 /*
304 * Kill any outstanding allocations.
305 */
306 while ((node = rb_first(&a->allocs)) != NULL) {
307 alloc = container_of(node, struct gk20a_bitmap_alloc,
308 alloc_entry);
309
310 rb_erase(node, &a->allocs);
311 kfree(alloc);
312 }
313
314 kfree(a->bitmap);
315 kfree(a);
316}
317
318static void gk20a_bitmap_print_stats(struct gk20a_allocator *__a,
319 struct seq_file *s, int lock)
320{
321 struct gk20a_bitmap_allocator *a = bitmap_allocator(__a);
322
323 __alloc_pstat(s, __a, "Bitmap allocator params:\n");
324 __alloc_pstat(s, __a, " start = 0x%llx\n", a->base);
325 __alloc_pstat(s, __a, " end = 0x%llx\n", a->base + a->length);
326 __alloc_pstat(s, __a, " blks = 0x%llx\n", a->num_bits);
327
328 /* Actual stats. */
329 __alloc_pstat(s, __a, "Stats:\n");
330 __alloc_pstat(s, __a, " Number allocs = 0x%llx\n", a->nr_allocs);
331 __alloc_pstat(s, __a, " Number fixed = 0x%llx\n", a->nr_fixed_allocs);
332 __alloc_pstat(s, __a, " Bytes alloced = 0x%llx\n", a->bytes_alloced);
333 __alloc_pstat(s, __a, " Bytes freed = 0x%llx\n", a->bytes_freed);
334 __alloc_pstat(s, __a, " Outstanding = 0x%llx\n",
335 a->bytes_alloced - a->bytes_freed);
336}
337
338static const struct gk20a_allocator_ops bitmap_ops = {
339 .alloc = gk20a_bitmap_alloc,
340 .free = gk20a_bitmap_free,
341
342 .alloc_fixed = gk20a_bitmap_alloc_fixed,
343 .free_fixed = gk20a_bitmap_free_fixed,
344
345 .base = gk20a_bitmap_alloc_base,
346 .length = gk20a_bitmap_alloc_length,
347 .end = gk20a_bitmap_alloc_end,
348 .inited = gk20a_bitmap_alloc_inited,
349
350 .fini = gk20a_bitmap_alloc_destroy,
351
352 .print_stats = gk20a_bitmap_print_stats,
353};
354
355
356int gk20a_bitmap_allocator_init(struct gk20a_allocator *__a,
357 const char *name, u64 base, u64 length,
358 u64 blk_size, u64 flags)
359{
360 int err;
361 struct gk20a_bitmap_allocator *a;
362
363 mutex_lock(&meta_data_cache_lock);
364 if (!meta_data_cache)
365 meta_data_cache = KMEM_CACHE(gk20a_bitmap_alloc, 0);
366 mutex_unlock(&meta_data_cache_lock);
367
368 if (!meta_data_cache)
369 return -ENOMEM;
370
371 if (WARN_ON(blk_size & (blk_size - 1)))
372 return -EINVAL;
373
374 /*
375 * blk_size must be a power-of-2; base length also need to be aligned
376 * to blk_size.
377 */
378 if (blk_size & (blk_size - 1) ||
379 base & (blk_size - 1) || length & (blk_size - 1))
380 return -EINVAL;
381
382 if (base == 0) {
383 base = blk_size;
384 length -= blk_size;
385 }
386
387 a = kzalloc(sizeof(struct gk20a_bitmap_allocator), GFP_KERNEL);
388 if (!a)
389 return -ENOMEM;
390
391 err = __gk20a_alloc_common_init(__a, name, a, false, &bitmap_ops);
392 if (err)
393 goto fail;
394
395 a->base = base;
396 a->length = length;
397 a->blk_size = blk_size;
398 a->blk_shift = __ffs(a->blk_size);
399 a->num_bits = length >> a->blk_shift;
400 a->bit_offs = a->base >> a->blk_shift;
401 a->flags = flags;
402
403 a->bitmap = kzalloc(sizeof(*a->bitmap) * BITS_TO_LONGS(a->num_bits),
404 GFP_KERNEL);
405 if (!a->bitmap)
406 goto fail;
407
408 a->inited = true;
409
410 gk20a_init_alloc_debug(__a);
411 alloc_dbg(__a, "New allocator: type bitmap\n");
412 alloc_dbg(__a, " base 0x%llx\n", a->base);
413 alloc_dbg(__a, " bit_offs 0x%llx\n", a->bit_offs);
414 alloc_dbg(__a, " size 0x%llx\n", a->length);
415 alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size);
416 alloc_dbg(__a, " flags 0x%llx\n", a->flags);
417
418 return 0;
419
420fail:
421 kfree(a);
422 return err;
423}