summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2016-06-24 17:12:24 -0400
committerAlex Waterman <alexw@nvidia.com>2016-07-19 14:21:46 -0400
commitb6569319c772d84087a0a1a6d7146bdcae8e9aab (patch)
tree16e7bae422279925301d9116b1e7f4d8aa656483 /drivers/gpu/nvgpu
parentf4b77e465648e87b19a7df4bb2a121ac8ac1b851 (diff)
gpu: nvgpu: Support multiple types of allocators
Support multiple types of allocation backends. Currently there is only one allocator implementation available: a buddy allocator. Buddy allocators have certain limitations though. For one the allocator requires metadata to be allocated from the kernel's system memory. This causes a given buddy allocation to potentially sleep on a kmalloc() call. This patch has been created so that a new backend can be created which will avoid any dynamic system memory management routines from being called. Bug 1781897 Change-Id: I98d6c8402c049942f13fee69c6901a166f177f65 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1172115 GVS: Gerrit_Virtual_Submit Reviewed-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-by: Yu-Huan Hsu <yhsu@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r--drivers/gpu/nvgpu/gk20a/as_gk20a.c9
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator.c500
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator.h169
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c112
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h7
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.c26
-rw-r--r--drivers/gpu/nvgpu/vgpu/mm_vgpu.c22
7 files changed, 554 insertions, 291 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
index 0571ca1f..8144ec6e 100644
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
@@ -279,16 +279,17 @@ static int gk20a_as_ioctl_get_va_regions(
279 279
280 for (i = 0; i < write_entries; ++i) { 280 for (i = 0; i < write_entries; ++i) {
281 struct nvgpu_as_va_region region; 281 struct nvgpu_as_va_region region;
282 struct gk20a_allocator *vma = vm->fixed.init ? 282 struct gk20a_allocator *vma =
283 gk20a_alloc_initialized(&vm->fixed) ?
283 &vm->fixed : &vm->vma[i]; 284 &vm->fixed : &vm->vma[i];
284 285
285 memset(&region, 0, sizeof(struct nvgpu_as_va_region)); 286 memset(&region, 0, sizeof(struct nvgpu_as_va_region));
286 287
287 region.page_size = vm->gmmu_page_sizes[i]; 288 region.page_size = vm->gmmu_page_sizes[i];
288 region.offset = vma->base; 289 region.offset = gk20a_alloc_base(vma);
289 /* No __aeabi_uldivmod() on some platforms... */ 290 /* No __aeabi_uldivmod() on some platforms... */
290 region.pages = (vma->end - vma->start) >> 291 region.pages = (gk20a_alloc_end(vma) -
291 ilog2(region.page_size); 292 gk20a_alloc_base(vma)) >> ilog2(region.page_size);
292 293
293 if (copy_to_user(user_region_ptr + i, &region, sizeof(region))) 294 if (copy_to_user(user_region_ptr + i, &region, sizeof(region)))
294 return -EFAULT; 295 return -EFAULT;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
index d3a9202b..f2164768 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
@@ -17,43 +17,58 @@
17 */ 17 */
18 18
19#include <linux/kernel.h> 19#include <linux/kernel.h>
20#include <linux/seq_file.h>
21#include <linux/slab.h> 20#include <linux/slab.h>
22#include <linux/debugfs.h>
23 21
24#include "platform_gk20a.h" 22#include "platform_gk20a.h"
25#include "gk20a_allocator.h" 23#include "gk20a_allocator.h"
26 24
27#include "mm_gk20a.h" 25#include "mm_gk20a.h"
28 26
29static struct dentry *balloc_debugfs_root; 27static struct dentry *gk20a_alloc_debugfs_root;
30 28
31static struct kmem_cache *buddy_cache; /* slab cache for meta data. */ 29static struct kmem_cache *buddy_cache; /* slab cache for meta data. */
32 30
33static u32 balloc_tracing_on; 31u32 gk20a_alloc_tracing_on;
34 32
35#define balloc_trace_func() \ 33#define gk20a_alloc_trace_func() \
36 do { \ 34 do { \
37 if (balloc_tracing_on) \ 35 if (gk20a_alloc_tracing_on) \
38 trace_printk("%s\n", __func__); \ 36 trace_printk("%s\n", __func__); \
39 } while (0) 37 } while (0)
40 38
41#define balloc_trace_func_done() \ 39#define gk20a_alloc_trace_func_done() \
42 do { \ 40 do { \
43 if (balloc_tracing_on) \ 41 if (gk20a_alloc_tracing_on) \
44 trace_printk("%s_done\n", __func__); \ 42 trace_printk("%s_done\n", __func__); \
45 } while (0) 43 } while (0)
46 44
47 45/*
48static void balloc_init_alloc_debug(struct gk20a_allocator *a); 46 * Buddy allocator implementation.
49static void balloc_print_stats(struct gk20a_allocator *a, struct seq_file *s, 47 */
50 int lock); 48static u64 gk20a_buddy_alloc(struct gk20a_allocator *__a, u64 len);
51static struct gk20a_buddy *balloc_free_buddy(struct gk20a_allocator *a, 49static void gk20a_buddy_free(struct gk20a_allocator *__a, u64 addr);
50static u64 gk20a_buddy_alloc_fixed(struct gk20a_allocator *__a,
51 u64 base, u64 len);
52static u64 gk20a_buddy_alloc_base(struct gk20a_allocator *a);
53static u64 gk20a_buddy_alloc_length(struct gk20a_allocator *a);
54static u64 gk20a_buddy_alloc_end(struct gk20a_allocator *a);
55static int gk20a_buddy_alloc_inited(struct gk20a_allocator *a);
56
57static void gk20a_buddy_allocator_destroy(struct gk20a_allocator *__a);
58static void gk20a_buddy_print_stats(struct gk20a_allocator *__a,
59 struct seq_file *s, int lock);
60
61/* Some other buddy allocator functions. */
62static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a,
52 u64 addr); 63 u64 addr);
53static void balloc_coalesce(struct gk20a_allocator *a, struct gk20a_buddy *b); 64static void balloc_coalesce(struct gk20a_buddy_allocator *a,
54static void __balloc_do_free_fixed(struct gk20a_allocator *a, 65 struct gk20a_buddy *b);
66static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a,
55 struct gk20a_fixed_alloc *falloc); 67 struct gk20a_fixed_alloc *falloc);
56 68
69/* Debugging. */
70static void gk20a_init_alloc_debug(struct gk20a_allocator *a);
71
57/* 72/*
58 * This function is not present in older kernel's list.h code. 73 * This function is not present in older kernel's list.h code.
59 */ 74 */
@@ -62,6 +77,23 @@ static void __balloc_do_free_fixed(struct gk20a_allocator *a,
62 list_entry((ptr)->prev, type, member) 77 list_entry((ptr)->prev, type, member)
63#endif 78#endif
64 79
80static const struct gk20a_allocator_ops buddy_ops = {
81 .alloc = gk20a_buddy_alloc,
82 .free = gk20a_buddy_free,
83
84 .alloc_fixed = gk20a_buddy_alloc_fixed,
85 /* .free_fixed not needed. */
86
87 .base = gk20a_buddy_alloc_base,
88 .length = gk20a_buddy_alloc_length,
89 .end = gk20a_buddy_alloc_end,
90 .inited = gk20a_buddy_alloc_inited,
91
92 .fini = gk20a_buddy_allocator_destroy,
93
94 .print_stats = gk20a_buddy_print_stats,
95};
96
65/* 97/*
66 * GPU buddy allocator for various address spaces. 98 * GPU buddy allocator for various address spaces.
67 * 99 *
@@ -80,13 +112,95 @@ static void __balloc_do_free_fixed(struct gk20a_allocator *a,
80 * easily PDE aligned so this hasn't been a problem. 112 * easily PDE aligned so this hasn't been a problem.
81 */ 113 */
82 114
115static u64 gk20a_buddy_alloc_length(struct gk20a_allocator *a)
116{
117 struct gk20a_buddy_allocator *ba = a->priv;
118
119 return ba->length;
120}
121
122static u64 gk20a_buddy_alloc_base(struct gk20a_allocator *a)
123{
124 struct gk20a_buddy_allocator *ba = a->priv;
125
126 return ba->start;
127}
128
129static int gk20a_buddy_alloc_inited(struct gk20a_allocator *a)
130{
131 struct gk20a_buddy_allocator *ba = a->priv;
132
133 return ba->inited;
134}
135static u64 gk20a_buddy_alloc_end(struct gk20a_allocator *a)
136{
137 struct gk20a_buddy_allocator *ba = a->priv;
138
139 return ba->end;
140}
141
142u64 gk20a_alloc_length(struct gk20a_allocator *a)
143{
144 return a->ops->length(a);
145}
146
147u64 gk20a_alloc_base(struct gk20a_allocator *a)
148{
149 return a->ops->base(a);
150}
151
152u64 gk20a_alloc_initialized(struct gk20a_allocator *a)
153{
154 if (!a->ops)
155 return 0;
156
157 return a->ops->inited(a);
158}
159
160u64 gk20a_alloc_end(struct gk20a_allocator *a)
161{
162 return a->ops->end(a);
163}
164
165u64 gk20a_alloc(struct gk20a_allocator *a, u64 len)
166{
167 return a->ops->alloc(a, len);
168}
169
170void gk20a_free(struct gk20a_allocator *a, u64 addr)
171{
172 a->ops->free(a, addr);
173}
174
175u64 gk20a_alloc_fixed(struct gk20a_allocator *a, u64 base, u64 len)
176{
177 return a->ops->alloc_fixed(a, base, len);
178}
179
180void gk20a_free_fixed(struct gk20a_allocator *a, u64 base, u64 len)
181{
182 /*
183 * If this operation is not defined for the allocator then just do
184 * nothing. The alternative would be to fall back on the regular
185 * free but that may be harmful in unexpected ways.
186 */
187 if (a->ops->free_fixed)
188 a->ops->free_fixed(a, base, len);
189}
190
191void gk20a_alloc_destroy(struct gk20a_allocator *a)
192{
193 a->ops->fini(a);
194 memset(a, 0, sizeof(*a));
195}
196
83/* 197/*
84 * Pick a suitable maximum order for this allocator. 198 * Pick a suitable maximum order for this allocator.
85 * 199 *
86 * Hueristic: Just guessing that the best max order is the largest single 200 * Hueristic: Just guessing that the best max order is the largest single
87 * block that will fit in the address space. 201 * block that will fit in the address space.
88 */ 202 */
89static void balloc_compute_max_order(struct gk20a_allocator *a) 203static void balloc_compute_max_order(struct gk20a_buddy_allocator *a)
90{ 204{
91 u64 true_max_order = ilog2(a->blks); 205 u64 true_max_order = ilog2(a->blks);
92 206
@@ -105,9 +219,10 @@ static void balloc_compute_max_order(struct gk20a_allocator *a)
105 * Since we can only allocate in chucks of a->blk_size we need to trim off 219 * Since we can only allocate in chucks of a->blk_size we need to trim off
106 * any excess data that is not aligned to a->blk_size. 220 * any excess data that is not aligned to a->blk_size.
107 */ 221 */
108static void balloc_allocator_align(struct gk20a_allocator *a) 222static void balloc_allocator_align(struct gk20a_buddy_allocator *a)
109{ 223{
110 a->start = ALIGN(a->base, a->blk_size); 224 a->start = ALIGN(a->base, a->blk_size);
225 WARN_ON(a->start != a->base);
111 a->end = (a->base + a->length) & ~(a->blk_size - 1); 226 a->end = (a->base + a->length) & ~(a->blk_size - 1);
112 a->count = a->end - a->start; 227 a->count = a->end - a->start;
113 a->blks = a->count >> a->blk_shift; 228 a->blks = a->count >> a->blk_shift;
@@ -116,7 +231,7 @@ static void balloc_allocator_align(struct gk20a_allocator *a)
116/* 231/*
117 * Pass NULL for parent if you want a top level buddy. 232 * Pass NULL for parent if you want a top level buddy.
118 */ 233 */
119static struct gk20a_buddy *balloc_new_buddy(struct gk20a_allocator *a, 234static struct gk20a_buddy *balloc_new_buddy(struct gk20a_buddy_allocator *a,
120 struct gk20a_buddy *parent, 235 struct gk20a_buddy *parent,
121 u64 start, u64 order) 236 u64 start, u64 order)
122{ 237{
@@ -136,13 +251,14 @@ static struct gk20a_buddy *balloc_new_buddy(struct gk20a_allocator *a,
136 return new_buddy; 251 return new_buddy;
137} 252}
138 253
139static void __balloc_buddy_list_add(struct gk20a_allocator *a, 254static void __balloc_buddy_list_add(struct gk20a_buddy_allocator *a,
140 struct gk20a_buddy *b, 255 struct gk20a_buddy *b,
141 struct list_head *list) 256 struct list_head *list)
142{ 257{
143 if (buddy_is_in_list(b)) { 258 if (buddy_is_in_list(b)) {
144 balloc_dbg(a, "Oops: adding added buddy (%llu:0x%llx)\n", 259 alloc_dbg(balloc_owner(a),
145 b->order, b->start); 260 "Oops: adding added buddy (%llu:0x%llx)\n",
261 b->order, b->start);
146 BUG(); 262 BUG();
147 } 263 }
148 264
@@ -160,12 +276,13 @@ static void __balloc_buddy_list_add(struct gk20a_allocator *a,
160 buddy_set_in_list(b); 276 buddy_set_in_list(b);
161} 277}
162 278
163static void __balloc_buddy_list_rem(struct gk20a_allocator *a, 279static void __balloc_buddy_list_rem(struct gk20a_buddy_allocator *a,
164 struct gk20a_buddy *b) 280 struct gk20a_buddy *b)
165{ 281{
166 if (!buddy_is_in_list(b)) { 282 if (!buddy_is_in_list(b)) {
167 balloc_dbg(a, "Oops: removing removed buddy (%llu:0x%llx)\n", 283 alloc_dbg(balloc_owner(a),
168 b->order, b->start); 284 "Oops: removing removed buddy (%llu:0x%llx)\n",
285 b->order, b->start);
169 BUG(); 286 BUG();
170 } 287 }
171 288
@@ -177,19 +294,21 @@ static void __balloc_buddy_list_rem(struct gk20a_allocator *a,
177 * Add a buddy to one of the buddy lists and deal with the necessary 294 * Add a buddy to one of the buddy lists and deal with the necessary
178 * book keeping. Adds the buddy to the list specified by the buddy's order. 295 * book keeping. Adds the buddy to the list specified by the buddy's order.
179 */ 296 */
180static void balloc_blist_add(struct gk20a_allocator *a, struct gk20a_buddy *b) 297static void balloc_blist_add(struct gk20a_buddy_allocator *a,
298 struct gk20a_buddy *b)
181{ 299{
182 __balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order)); 300 __balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order));
183 a->buddy_list_len[b->order]++; 301 a->buddy_list_len[b->order]++;
184} 302}
185 303
186static void balloc_blist_rem(struct gk20a_allocator *a, struct gk20a_buddy *b) 304static void balloc_blist_rem(struct gk20a_buddy_allocator *a,
305 struct gk20a_buddy *b)
187{ 306{
188 __balloc_buddy_list_rem(a, b); 307 __balloc_buddy_list_rem(a, b);
189 a->buddy_list_len[b->order]--; 308 a->buddy_list_len[b->order]--;
190} 309}
191 310
192static u64 balloc_get_order(struct gk20a_allocator *a, u64 len) 311static u64 balloc_get_order(struct gk20a_buddy_allocator *a, u64 len)
193{ 312{
194 if (len == 0) 313 if (len == 0)
195 return 0; 314 return 0;
@@ -200,7 +319,8 @@ static u64 balloc_get_order(struct gk20a_allocator *a, u64 len)
200 return fls(len); 319 return fls(len);
201} 320}
202 321
203static u64 __balloc_max_order_in(struct gk20a_allocator *a, u64 start, u64 end) 322static u64 __balloc_max_order_in(struct gk20a_buddy_allocator *a,
323 u64 start, u64 end)
204{ 324{
205 u64 size = (end - start) >> a->blk_shift; 325 u64 size = (end - start) >> a->blk_shift;
206 326
@@ -213,7 +333,7 @@ static u64 __balloc_max_order_in(struct gk20a_allocator *a, u64 start, u64 end)
213/* 333/*
214 * Initialize the buddy lists. 334 * Initialize the buddy lists.
215 */ 335 */
216static int balloc_init_lists(struct gk20a_allocator *a) 336static int balloc_init_lists(struct gk20a_buddy_allocator *a)
217{ 337{
218 int i; 338 int i;
219 u64 bstart, bend, order; 339 u64 bstart, bend, order;
@@ -253,6 +373,26 @@ cleanup:
253} 373}
254 374
255/* 375/*
376 * Handle the common init stuff for a gk20a_allocator.
377 */
378static int __gk20a_alloc_common_init(struct gk20a_allocator *a,
379 const char *name, void *priv,
380 const struct gk20a_allocator_ops *ops)
381{
382 if (!ops)
383 return -EINVAL;
384
385 a->ops = ops;
386 a->priv = priv;
387
388 mutex_init(&a->lock);
389
390 strlcpy(a->name, name, sizeof(a->name));
391
392 return 0;
393}
394
395/*
256 * Initialize a buddy allocator. Returns 0 on success. This allocator does 396 * Initialize a buddy allocator. Returns 0 on success. This allocator does
257 * not necessarily manage bytes. It manages distinct ranges of resources. This 397 * not necessarily manage bytes. It manages distinct ranges of resources. This
258 * allows the allocator to work for things like comp_tags, semaphores, etc. 398 * allows the allocator to work for things like comp_tags, semaphores, etc.
@@ -270,20 +410,40 @@ cleanup:
270 * will try and pick a reasonable max order. 410 * will try and pick a reasonable max order.
271 * @flags: Extra flags necessary. See GPU_BALLOC_*. 411 * @flags: Extra flags necessary. See GPU_BALLOC_*.
272 */ 412 */
273int __gk20a_allocator_init(struct gk20a_allocator *a, 413int __gk20a_buddy_allocator_init(struct gk20a_allocator *__a,
274 struct vm_gk20a *vm, const char *name, 414 struct vm_gk20a *vm, const char *name,
275 u64 base, u64 size, u64 blk_size, u64 max_order, 415 u64 base, u64 size, u64 blk_size,
276 u64 flags) 416 u64 max_order, u64 flags)
277{ 417{
278 int err; 418 int err;
419 struct gk20a_buddy_allocator *a;
420
421 /* blk_size must be greater than 0 and a power of 2. */
422 if (blk_size == 0)
423 return -EINVAL;
424 if (blk_size & (blk_size - 1))
425 return -EINVAL;
279 426
280 memset(a, 0, sizeof(struct gk20a_allocator)); 427 if (max_order > GPU_BALLOC_MAX_ORDER)
281 strncpy(a->name, name, 32); 428 return -EINVAL;
429
430 /* If this is to manage a GVA space we need a VM. */
431 if (flags & GPU_BALLOC_GVA_SPACE && !vm)
432 return -EINVAL;
433
434 a = kzalloc(sizeof(struct gk20a_buddy_allocator), GFP_KERNEL);
435 if (!a)
436 return -ENOMEM;
437
438 err = __gk20a_alloc_common_init(__a, name, a, &buddy_ops);
439 if (err)
440 goto fail;
282 441
283 a->base = base; 442 a->base = base;
284 a->length = size; 443 a->length = size;
285 a->blk_size = blk_size; 444 a->blk_size = blk_size;
286 a->blk_shift = __ffs(blk_size); 445 a->blk_shift = __ffs(blk_size);
446 a->owner = __a;
287 447
288 /* 448 /*
289 * If base is 0 then modfy base to be the size of one block so that we 449 * If base is 0 then modfy base to be the size of one block so that we
@@ -294,19 +454,6 @@ int __gk20a_allocator_init(struct gk20a_allocator *a,
294 a->length -= a->blk_size; 454 a->length -= a->blk_size;
295 } 455 }
296 456
297 /* blk_size must be greater than 0 and a power of 2. */
298 if (blk_size == 0)
299 return -EINVAL;
300 if (blk_size & (blk_size - 1))
301 return -EINVAL;
302
303 if (max_order > GPU_BALLOC_MAX_ORDER)
304 return -EINVAL;
305
306 /* If this is to manage a GVA space we need a VM. */
307 if (flags & GPU_BALLOC_GVA_SPACE && !vm)
308 return -EINVAL;
309
310 a->vm = vm; 457 a->vm = vm;
311 if (flags & GPU_BALLOC_GVA_SPACE) 458 if (flags & GPU_BALLOC_GVA_SPACE)
312 a->pte_blk_order = balloc_get_order(a, vm->big_page_size << 10); 459 a->pte_blk_order = balloc_get_order(a, vm->big_page_size << 10);
@@ -320,49 +467,55 @@ int __gk20a_allocator_init(struct gk20a_allocator *a,
320 /* Shared buddy kmem_cache for all allocators. */ 467 /* Shared buddy kmem_cache for all allocators. */
321 if (!buddy_cache) 468 if (!buddy_cache)
322 buddy_cache = KMEM_CACHE(gk20a_buddy, 0); 469 buddy_cache = KMEM_CACHE(gk20a_buddy, 0);
323 if (!buddy_cache) 470 if (!buddy_cache) {
324 return -ENOMEM; 471 err = -ENOMEM;
472 goto fail;
473 }
325 474
326 a->alloced_buddies = RB_ROOT; 475 a->alloced_buddies = RB_ROOT;
476 a->fixed_allocs = RB_ROOT;
327 err = balloc_init_lists(a); 477 err = balloc_init_lists(a);
328 if (err) 478 if (err)
329 return err; 479 goto fail;
330
331 mutex_init(&a->lock);
332 480
333 a->init = 1; 481 a->inited = 1;
334 482
335 balloc_init_alloc_debug(a); 483 gk20a_init_alloc_debug(__a);
336 balloc_dbg(a, "New allocator: base 0x%llx\n", a->base); 484 alloc_dbg(__a, "New allocator: base 0x%llx\n", a->base);
337 balloc_dbg(a, " size 0x%llx\n", a->length); 485 alloc_dbg(__a, " size 0x%llx\n", a->length);
338 balloc_dbg(a, " blk_size 0x%llx\n", a->blk_size); 486 alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size);
339 balloc_dbg(a, " max_order %llu\n", a->max_order); 487 alloc_dbg(__a, " max_order %llu\n", a->max_order);
340 balloc_dbg(a, " flags 0x%llx\n", a->flags); 488 alloc_dbg(__a, " flags 0x%llx\n", a->flags);
341 489
342 return 0; 490 return 0;
491
492fail:
493 kfree(a);
494 return err;
343} 495}
344 496
345int gk20a_allocator_init(struct gk20a_allocator *a, const char *name, 497int gk20a_buddy_allocator_init(struct gk20a_allocator *a, const char *name,
346 u64 base, u64 size, u64 blk_size) 498 u64 base, u64 size, u64 blk_size, u64 flags)
347{ 499{
348 return __gk20a_allocator_init(a, NULL, name, 500 return __gk20a_buddy_allocator_init(a, NULL, name,
349 base, size, blk_size, 0, 0); 501 base, size, blk_size, 0, 0);
350} 502}
351 503
352/* 504/*
353 * Clean up and destroy the passed allocator. 505 * Clean up and destroy the passed allocator.
354 */ 506 */
355void gk20a_allocator_destroy(struct gk20a_allocator *a) 507static void gk20a_buddy_allocator_destroy(struct gk20a_allocator *__a)
356{ 508{
509 int i;
357 struct rb_node *node; 510 struct rb_node *node;
358 struct gk20a_buddy *bud; 511 struct gk20a_buddy *bud;
359 struct gk20a_fixed_alloc *falloc; 512 struct gk20a_fixed_alloc *falloc;
360 int i; 513 struct gk20a_buddy_allocator *a = __a->priv;
361 514
362 balloc_lock(a); 515 alloc_lock(__a);
363 516
364 if (!IS_ERR_OR_NULL(a->debugfs_entry)) 517 if (!IS_ERR_OR_NULL(__a->debugfs_entry))
365 debugfs_remove(a->debugfs_entry); 518 debugfs_remove(__a->debugfs_entry);
366 519
367 /* 520 /*
368 * Free the fixed allocs first. 521 * Free the fixed allocs first.
@@ -415,16 +568,9 @@ void gk20a_allocator_destroy(struct gk20a_allocator *a)
415 } 568 }
416 } 569 }
417 570
418 a->init = 0; 571 kfree(a);
419 572
420 balloc_unlock(a); 573 alloc_unlock(__a);
421
422 /*
423 * We cant unlock an allocator after memsetting it. That wipes the
424 * state of the mutex. Hopefully no one uses the allocator after
425 * destroying it...
426 */
427 memset(a, 0, sizeof(struct gk20a_allocator));
428} 574}
429 575
430/* 576/*
@@ -433,7 +579,8 @@ void gk20a_allocator_destroy(struct gk20a_allocator *a)
433 * 579 *
434 * @a must be locked. 580 * @a must be locked.
435 */ 581 */
436static void balloc_coalesce(struct gk20a_allocator *a, struct gk20a_buddy *b) 582static void balloc_coalesce(struct gk20a_buddy_allocator *a,
583 struct gk20a_buddy *b)
437{ 584{
438 struct gk20a_buddy *parent; 585 struct gk20a_buddy *parent;
439 586
@@ -473,8 +620,8 @@ static void balloc_coalesce(struct gk20a_allocator *a, struct gk20a_buddy *b)
473 * 620 *
474 * @a must be locked. 621 * @a must be locked.
475 */ 622 */
476static int balloc_split_buddy(struct gk20a_allocator *a, struct gk20a_buddy *b, 623static int balloc_split_buddy(struct gk20a_buddy_allocator *a,
477 int pte_size) 624 struct gk20a_buddy *b, int pte_size)
478{ 625{
479 struct gk20a_buddy *left, *right; 626 struct gk20a_buddy *left, *right;
480 u64 half; 627 u64 half;
@@ -521,7 +668,8 @@ static int balloc_split_buddy(struct gk20a_allocator *a, struct gk20a_buddy *b,
521 * 668 *
522 * @a must be locked. 669 * @a must be locked.
523 */ 670 */
524static void balloc_alloc_buddy(struct gk20a_allocator *a, struct gk20a_buddy *b) 671static void balloc_alloc_buddy(struct gk20a_buddy_allocator *a,
672 struct gk20a_buddy *b)
525{ 673{
526 struct rb_node **new = &(a->alloced_buddies.rb_node); 674 struct rb_node **new = &(a->alloced_buddies.rb_node);
527 struct rb_node *parent = NULL; 675 struct rb_node *parent = NULL;
@@ -552,7 +700,7 @@ static void balloc_alloc_buddy(struct gk20a_allocator *a, struct gk20a_buddy *b)
552 * 700 *
553 * @a must be locked. 701 * @a must be locked.
554 */ 702 */
555static struct gk20a_buddy *balloc_free_buddy(struct gk20a_allocator *a, 703static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a,
556 u64 addr) 704 u64 addr)
557{ 705{
558 struct rb_node *node = a->alloced_buddies.rb_node; 706 struct rb_node *node = a->alloced_buddies.rb_node;
@@ -582,7 +730,7 @@ static struct gk20a_buddy *balloc_free_buddy(struct gk20a_allocator *a,
582/* 730/*
583 * Find a suitable buddy for the given order and PTE type (big or little). 731 * Find a suitable buddy for the given order and PTE type (big or little).
584 */ 732 */
585static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_allocator *a, 733static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_buddy_allocator *a,
586 u64 order, int pte_size) 734 u64 order, int pte_size)
587{ 735{
588 struct gk20a_buddy *bud; 736 struct gk20a_buddy *bud;
@@ -615,7 +763,8 @@ static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_allocator *a,
615 * 763 *
616 * @a must be locked. 764 * @a must be locked.
617 */ 765 */
618static u64 __balloc_do_alloc(struct gk20a_allocator *a, u64 order, int pte_size) 766static u64 __balloc_do_alloc(struct gk20a_buddy_allocator *a,
767 u64 order, int pte_size)
619{ 768{
620 u64 split_order; 769 u64 split_order;
621 struct gk20a_buddy *bud = NULL; 770 struct gk20a_buddy *bud = NULL;
@@ -644,21 +793,22 @@ static u64 __balloc_do_alloc(struct gk20a_allocator *a, u64 order, int pte_size)
644/* 793/*
645 * Allocate memory from the passed allocator. 794 * Allocate memory from the passed allocator.
646 */ 795 */
647u64 gk20a_balloc(struct gk20a_allocator *a, u64 len) 796static u64 gk20a_buddy_alloc(struct gk20a_allocator *__a, u64 len)
648{ 797{
649 u64 order, addr; 798 u64 order, addr;
650 int pte_size; 799 int pte_size;
800 struct gk20a_buddy_allocator *a = __a->priv;
651 801
652 balloc_trace_func(); 802 gk20a_alloc_trace_func();
653 803
654 balloc_lock(a); 804 alloc_lock(__a);
655 805
656 order = balloc_get_order(a, len); 806 order = balloc_get_order(a, len);
657 807
658 if (order > a->max_order) { 808 if (order > a->max_order) {
659 balloc_unlock(a); 809 alloc_unlock(__a);
660 balloc_dbg(a, "Alloc fail\n"); 810 alloc_dbg(balloc_owner(a), "Alloc fail\n");
661 balloc_trace_func_done(); 811 gk20a_alloc_trace_func_done();
662 return 0; 812 return 0;
663 } 813 }
664 814
@@ -681,18 +831,19 @@ u64 gk20a_balloc(struct gk20a_allocator *a, u64 len)
681 if (addr) { 831 if (addr) {
682 a->bytes_alloced += len; 832 a->bytes_alloced += len;
683 a->bytes_alloced_real += balloc_order_to_len(a, order); 833 a->bytes_alloced_real += balloc_order_to_len(a, order);
684 balloc_dbg(a, "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n", 834 alloc_dbg(balloc_owner(a),
685 addr, order, len, 835 "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n",
836 addr, order, len,
686 pte_size == gmmu_page_size_big ? "big" : 837 pte_size == gmmu_page_size_big ? "big" :
687 pte_size == gmmu_page_size_small ? "small" : 838 pte_size == gmmu_page_size_small ? "small" :
688 "NA/any"); 839 "NA/any");
689 } else { 840 } else {
690 balloc_dbg(a, "Alloc failed: no mem!\n"); 841 alloc_dbg(balloc_owner(a), "Alloc failed: no mem!\n");
691 } 842 }
692 843
693 balloc_unlock(a); 844 alloc_unlock(__a);
694 845
695 balloc_trace_func_done(); 846 gk20a_alloc_trace_func_done();
696 return addr; 847 return addr;
697} 848}
698 849
@@ -703,7 +854,8 @@ u64 gk20a_balloc(struct gk20a_allocator *a, u64 len)
703 * TODO: Right now this uses the unoptimal approach of going through all 854 * TODO: Right now this uses the unoptimal approach of going through all
704 * outstanding allocations and checking their base/ends. This could be better. 855 * outstanding allocations and checking their base/ends. This could be better.
705 */ 856 */
706static int balloc_is_range_free(struct gk20a_allocator *a, u64 base, u64 end) 857static int balloc_is_range_free(struct gk20a_buddy_allocator *a,
858 u64 base, u64 end)
707{ 859{
708 struct rb_node *node; 860 struct rb_node *node;
709 struct gk20a_buddy *bud; 861 struct gk20a_buddy *bud;
@@ -728,7 +880,7 @@ static int balloc_is_range_free(struct gk20a_allocator *a, u64 base, u64 end)
728 return 1; 880 return 1;
729} 881}
730 882
731static void balloc_alloc_fixed(struct gk20a_allocator *a, 883static void balloc_alloc_fixed(struct gk20a_buddy_allocator *a,
732 struct gk20a_fixed_alloc *f) 884 struct gk20a_fixed_alloc *f)
733{ 885{
734 struct rb_node **new = &(a->fixed_allocs.rb_node); 886 struct rb_node **new = &(a->fixed_allocs.rb_node);
@@ -758,8 +910,8 @@ static void balloc_alloc_fixed(struct gk20a_allocator *a,
758 * 910 *
759 * @a must be locked. 911 * @a must be locked.
760 */ 912 */
761static struct gk20a_fixed_alloc *balloc_free_fixed(struct gk20a_allocator *a, 913static struct gk20a_fixed_alloc *balloc_free_fixed(
762 u64 addr) 914 struct gk20a_buddy_allocator *a, u64 addr)
763{ 915{
764 struct rb_node *node = a->fixed_allocs.rb_node; 916 struct rb_node *node = a->fixed_allocs.rb_node;
765 struct gk20a_fixed_alloc *falloc; 917 struct gk20a_fixed_alloc *falloc;
@@ -788,7 +940,7 @@ static struct gk20a_fixed_alloc *balloc_free_fixed(struct gk20a_allocator *a,
788 * Find the parent range - doesn't necessarily need the parent to actually exist 940 * Find the parent range - doesn't necessarily need the parent to actually exist
789 * as a buddy. Finding an existing parent comes later... 941 * as a buddy. Finding an existing parent comes later...
790 */ 942 */
791static void __balloc_get_parent_range(struct gk20a_allocator *a, 943static void __balloc_get_parent_range(struct gk20a_buddy_allocator *a,
792 u64 base, u64 order, 944 u64 base, u64 order,
793 u64 *pbase, u64 *porder) 945 u64 *pbase, u64 *porder)
794{ 946{
@@ -808,8 +960,8 @@ static void __balloc_get_parent_range(struct gk20a_allocator *a,
808 * Makes a buddy at the passed address. This will make all parent buddies 960 * Makes a buddy at the passed address. This will make all parent buddies
809 * necessary for this buddy to exist as well. 961 * necessary for this buddy to exist as well.
810 */ 962 */
811static struct gk20a_buddy *__balloc_make_fixed_buddy(struct gk20a_allocator *a, 963static struct gk20a_buddy *__balloc_make_fixed_buddy(
812 u64 base, u64 order) 964 struct gk20a_buddy_allocator *a, u64 base, u64 order)
813{ 965{
814 struct gk20a_buddy *bud = NULL; 966 struct gk20a_buddy *bud = NULL;
815 struct list_head *order_list; 967 struct list_head *order_list;
@@ -843,7 +995,7 @@ static struct gk20a_buddy *__balloc_make_fixed_buddy(struct gk20a_allocator *a,
843 } 995 }
844 996
845 if (cur_order > a->max_order) { 997 if (cur_order > a->max_order) {
846 balloc_dbg(a, "No buddy for range ???\n"); 998 alloc_dbg(balloc_owner(a), "No buddy for range ???\n");
847 return NULL; 999 return NULL;
848 } 1000 }
849 1001
@@ -864,7 +1016,7 @@ static struct gk20a_buddy *__balloc_make_fixed_buddy(struct gk20a_allocator *a,
864 return bud; 1016 return bud;
865} 1017}
866 1018
867static u64 __balloc_do_alloc_fixed(struct gk20a_allocator *a, 1019static u64 __balloc_do_alloc_fixed(struct gk20a_buddy_allocator *a,
868 struct gk20a_fixed_alloc *falloc, 1020 struct gk20a_fixed_alloc *falloc,
869 u64 base, u64 len) 1021 u64 base, u64 len)
870{ 1022{
@@ -880,7 +1032,8 @@ static u64 __balloc_do_alloc_fixed(struct gk20a_allocator *a,
880 __fls(len >> a->blk_shift)); 1032 __fls(len >> a->blk_shift));
881 1033
882 if (align_order > a->max_order) { 1034 if (align_order > a->max_order) {
883 balloc_dbg(a, "Align order too big: %llu > %llu\n", 1035 alloc_dbg(balloc_owner(a),
1036 "Align order too big: %llu > %llu\n",
884 align_order, a->max_order); 1037 align_order, a->max_order);
885 return 0; 1038 return 0;
886 } 1039 }
@@ -898,7 +1051,8 @@ static u64 __balloc_do_alloc_fixed(struct gk20a_allocator *a,
898 balloc_base_unshift(a, inc_base), 1051 balloc_base_unshift(a, inc_base),
899 align_order); 1052 align_order);
900 if (!bud) { 1053 if (!bud) {
901 balloc_dbg(a, "Fixed buddy failed: {0x%llx, %llu}!\n", 1054 alloc_dbg(balloc_owner(a),
1055 "Fixed buddy failed: {0x%llx, %llu}!\n",
902 balloc_base_unshift(a, inc_base), 1056 balloc_base_unshift(a, inc_base),
903 align_order); 1057 align_order);
904 goto err_and_cleanup; 1058 goto err_and_cleanup;
@@ -943,13 +1097,15 @@ err_and_cleanup:
943 * 1097 *
944 * Please do not use this function unless _absolutely_ necessary. 1098 * Please do not use this function unless _absolutely_ necessary.
945 */ 1099 */
946u64 gk20a_balloc_fixed(struct gk20a_allocator *a, u64 base, u64 len) 1100static u64 gk20a_buddy_alloc_fixed(struct gk20a_allocator *__a,
1101 u64 base, u64 len)
947{ 1102{
948 struct gk20a_fixed_alloc *falloc = NULL;
949 struct gk20a_buddy *bud;
950 u64 ret, real_bytes = 0; 1103 u64 ret, real_bytes = 0;
1104 struct gk20a_buddy *bud;
1105 struct gk20a_fixed_alloc *falloc = NULL;
1106 struct gk20a_buddy_allocator *a = __a->priv;
951 1107
952 balloc_trace_func(); 1108 gk20a_alloc_trace_func();
953 1109
954 /* If base isn't aligned to an order 0 block, fail. */ 1110 /* If base isn't aligned to an order 0 block, fail. */
955 if (base & (a->blk_size - 1)) 1111 if (base & (a->blk_size - 1))
@@ -966,16 +1122,18 @@ u64 gk20a_balloc_fixed(struct gk20a_allocator *a, u64 base, u64 len)
966 falloc->start = base; 1122 falloc->start = base;
967 falloc->end = base + len; 1123 falloc->end = base + len;
968 1124
969 balloc_lock(a); 1125 alloc_lock(__a);
970 if (!balloc_is_range_free(a, base, base + len)) { 1126 if (!balloc_is_range_free(a, base, base + len)) {
971 balloc_dbg(a, "Range not free: 0x%llx -> 0x%llx\n", 1127 alloc_dbg(balloc_owner(a),
1128 "Range not free: 0x%llx -> 0x%llx\n",
972 base, base + len); 1129 base, base + len);
973 goto fail_unlock; 1130 goto fail_unlock;
974 } 1131 }
975 1132
976 ret = __balloc_do_alloc_fixed(a, falloc, base, len); 1133 ret = __balloc_do_alloc_fixed(a, falloc, base, len);
977 if (!ret) { 1134 if (!ret) {
978 balloc_dbg(a, "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n", 1135 alloc_dbg(balloc_owner(a),
1136 "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n",
979 base, base + len); 1137 base, base + len);
980 goto fail_unlock; 1138 goto fail_unlock;
981 } 1139 }
@@ -988,21 +1146,21 @@ u64 gk20a_balloc_fixed(struct gk20a_allocator *a, u64 base, u64 len)
988 a->bytes_alloced += len; 1146 a->bytes_alloced += len;
989 a->bytes_alloced_real += real_bytes; 1147 a->bytes_alloced_real += real_bytes;
990 1148
991 balloc_unlock(a); 1149 alloc_unlock(__a);
992 balloc_dbg(a, "Alloc (fixed) 0x%llx\n", base); 1150 alloc_dbg(balloc_owner(a), "Alloc (fixed) 0x%llx\n", base);
993 1151
994 balloc_trace_func_done(); 1152 gk20a_alloc_trace_func_done();
995 return base; 1153 return base;
996 1154
997fail_unlock: 1155fail_unlock:
998 balloc_unlock(a); 1156 alloc_unlock(__a);
999fail: 1157fail:
1000 kfree(falloc); 1158 kfree(falloc);
1001 balloc_trace_func_done(); 1159 gk20a_alloc_trace_func_done();
1002 return 0; 1160 return 0;
1003} 1161}
1004 1162
1005static void __balloc_do_free_fixed(struct gk20a_allocator *a, 1163static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a,
1006 struct gk20a_fixed_alloc *falloc) 1164 struct gk20a_fixed_alloc *falloc)
1007{ 1165{
1008 struct gk20a_buddy *bud; 1166 struct gk20a_buddy *bud;
@@ -1029,19 +1187,20 @@ static void __balloc_do_free_fixed(struct gk20a_allocator *a,
1029/* 1187/*
1030 * Free the passed allocation. 1188 * Free the passed allocation.
1031 */ 1189 */
1032void gk20a_bfree(struct gk20a_allocator *a, u64 addr) 1190static void gk20a_buddy_free(struct gk20a_allocator *__a, u64 addr)
1033{ 1191{
1034 struct gk20a_buddy *bud; 1192 struct gk20a_buddy *bud;
1035 struct gk20a_fixed_alloc *falloc; 1193 struct gk20a_fixed_alloc *falloc;
1194 struct gk20a_buddy_allocator *a = __a->priv;
1036 1195
1037 balloc_trace_func(); 1196 gk20a_alloc_trace_func();
1038 1197
1039 if (!addr) { 1198 if (!addr) {
1040 balloc_trace_func_done(); 1199 gk20a_alloc_trace_func_done();
1041 return; 1200 return;
1042 } 1201 }
1043 1202
1044 balloc_lock(a); 1203 alloc_lock(__a);
1045 1204
1046 /* 1205 /*
1047 * First see if this is a fixed alloc. If not fall back to a regular 1206 * First see if this is a fixed alloc. If not fall back to a regular
@@ -1066,9 +1225,9 @@ void gk20a_bfree(struct gk20a_allocator *a, u64 addr)
1066 balloc_coalesce(a, bud); 1225 balloc_coalesce(a, bud);
1067 1226
1068done: 1227done:
1069 balloc_unlock(a); 1228 alloc_unlock(__a);
1070 balloc_dbg(a, "Free 0x%llx\n", addr); 1229 alloc_dbg(balloc_owner(a), "Free 0x%llx\n", addr);
1071 balloc_trace_func_done(); 1230 gk20a_alloc_trace_func_done();
1072 return; 1231 return;
1073} 1232}
1074 1233
@@ -1077,49 +1236,42 @@ done:
1077 * stats are printed to the kernel log. This lets this code be used for 1236 * stats are printed to the kernel log. This lets this code be used for
1078 * debugging purposes internal to the allocator. 1237 * debugging purposes internal to the allocator.
1079 */ 1238 */
1080static void balloc_print_stats(struct gk20a_allocator *a, struct seq_file *s, 1239static void gk20a_buddy_print_stats(struct gk20a_allocator *__a,
1081 int lock) 1240 struct seq_file *s, int lock)
1082{ 1241{
1083#define __balloc_pstat(s, fmt, arg...) \
1084 do { \
1085 if (s) \
1086 seq_printf(s, fmt, ##arg); \
1087 else \
1088 balloc_dbg(a, fmt, ##arg); \
1089 } while (0)
1090
1091 int i; 1242 int i;
1092 struct rb_node *node; 1243 struct rb_node *node;
1093 struct gk20a_fixed_alloc *falloc; 1244 struct gk20a_fixed_alloc *falloc;
1245 struct gk20a_buddy_allocator *a = __a->priv;
1094 1246
1095 __balloc_pstat(s, "base = %llu, limit = %llu, blk_size = %llu\n", 1247 __alloc_pstat(s, __a, "base = %llu, limit = %llu, blk_size = %llu\n",
1096 a->base, a->length, a->blk_size); 1248 a->base, a->length, a->blk_size);
1097 __balloc_pstat(s, "Internal params:\n"); 1249 __alloc_pstat(s, __a, "Internal params:\n");
1098 __balloc_pstat(s, " start = 0x%llx\n", a->start); 1250 __alloc_pstat(s, __a, " start = 0x%llx\n", a->start);
1099 __balloc_pstat(s, " end = 0x%llx\n", a->end); 1251 __alloc_pstat(s, __a, " end = 0x%llx\n", a->end);
1100 __balloc_pstat(s, " count = 0x%llx\n", a->count); 1252 __alloc_pstat(s, __a, " count = 0x%llx\n", a->count);
1101 __balloc_pstat(s, " blks = 0x%llx\n", a->blks); 1253 __alloc_pstat(s, __a, " blks = 0x%llx\n", a->blks);
1102 __balloc_pstat(s, " max_order = %llu\n", a->max_order); 1254 __alloc_pstat(s, __a, " max_order = %llu\n", a->max_order);
1103 1255
1104 __balloc_pstat(s, "Buddy blocks:\n"); 1256 __alloc_pstat(s, __a, "Buddy blocks:\n");
1105 __balloc_pstat(s, " Order Free Alloced Split\n"); 1257 __alloc_pstat(s, __a, " Order Free Alloced Split\n");
1106 __balloc_pstat(s, " ----- ---- ------- -----\n"); 1258 __alloc_pstat(s, __a, " ----- ---- ------- -----\n");
1107 1259
1108 if (lock) 1260 if (lock)
1109 balloc_lock(a); 1261 alloc_lock(__a);
1110 for (i = a->max_order; i >= 0; i--) { 1262 for (i = a->max_order; i >= 0; i--) {
1111 if (a->buddy_list_len[i] == 0 && 1263 if (a->buddy_list_len[i] == 0 &&
1112 a->buddy_list_alloced[i] == 0 && 1264 a->buddy_list_alloced[i] == 0 &&
1113 a->buddy_list_split[i] == 0) 1265 a->buddy_list_split[i] == 0)
1114 continue; 1266 continue;
1115 1267
1116 __balloc_pstat(s, " %3d %-7llu %-9llu %llu\n", i, 1268 __alloc_pstat(s, __a, " %3d %-7llu %-9llu %llu\n", i,
1117 a->buddy_list_len[i], 1269 a->buddy_list_len[i],
1118 a->buddy_list_alloced[i], 1270 a->buddy_list_alloced[i],
1119 a->buddy_list_split[i]); 1271 a->buddy_list_split[i]);
1120 } 1272 }
1121 1273
1122 __balloc_pstat(s, "\n"); 1274 __alloc_pstat(s, __a, "\n");
1123 1275
1124 for (node = rb_first(&a->fixed_allocs), i = 1; 1276 for (node = rb_first(&a->fixed_allocs), i = 1;
1125 node != NULL; 1277 node != NULL;
@@ -1127,27 +1279,33 @@ static void balloc_print_stats(struct gk20a_allocator *a, struct seq_file *s,
1127 falloc = container_of(node, 1279 falloc = container_of(node,
1128 struct gk20a_fixed_alloc, alloced_entry); 1280 struct gk20a_fixed_alloc, alloced_entry);
1129 1281
1130 __balloc_pstat(s, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n", 1282 __alloc_pstat(s, __a, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n",
1131 i, falloc->start, falloc->end); 1283 i, falloc->start, falloc->end);
1132 } 1284 }
1133 1285
1134 __balloc_pstat(s, "\n"); 1286 __alloc_pstat(s, __a, "\n");
1135 __balloc_pstat(s, "Bytes allocated: %llu\n", a->bytes_alloced); 1287 __alloc_pstat(s, __a, "Bytes allocated: %llu\n",
1136 __balloc_pstat(s, "Bytes allocated (real): %llu\n", 1288 a->bytes_alloced);
1137 a->bytes_alloced_real); 1289 __alloc_pstat(s, __a, "Bytes allocated (real): %llu\n",
1138 __balloc_pstat(s, "Bytes freed: %llu\n", a->bytes_freed); 1290 a->bytes_alloced_real);
1291 __alloc_pstat(s, __a, "Bytes freed: %llu\n",
1292 a->bytes_freed);
1139 1293
1140 if (lock) 1294 if (lock)
1141 balloc_unlock(a); 1295 alloc_unlock(__a);
1296}
1142 1297
1143#undef __balloc_pstats 1298void gk20a_alloc_print_stats(struct gk20a_allocator *__a,
1299 struct seq_file *s, int lock)
1300{
1301 __a->ops->print_stats(__a, s, lock);
1144} 1302}
1145 1303
1146static int __alloc_show(struct seq_file *s, void *unused) 1304static int __alloc_show(struct seq_file *s, void *unused)
1147{ 1305{
1148 struct gk20a_allocator *a = s->private; 1306 struct gk20a_allocator *a = s->private;
1149 1307
1150 balloc_print_stats(a, s, 1); 1308 gk20a_alloc_print_stats(a, s, 1);
1151 1309
1152 return 0; 1310 return 0;
1153} 1311}
@@ -1164,13 +1322,13 @@ static const struct file_operations __alloc_fops = {
1164 .release = single_release, 1322 .release = single_release,
1165}; 1323};
1166 1324
1167static void balloc_init_alloc_debug(struct gk20a_allocator *a) 1325static void gk20a_init_alloc_debug(struct gk20a_allocator *a)
1168{ 1326{
1169 if (!balloc_debugfs_root) 1327 if (!gk20a_alloc_debugfs_root)
1170 return; 1328 return;
1171 1329
1172 a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO, 1330 a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO,
1173 balloc_debugfs_root, 1331 gk20a_alloc_debugfs_root,
1174 a, &__alloc_fops); 1332 a, &__alloc_fops);
1175} 1333}
1176 1334
@@ -1180,11 +1338,11 @@ void gk20a_alloc_debugfs_init(struct platform_device *pdev)
1180 struct gk20a_platform *platform = platform_get_drvdata(pdev); 1338 struct gk20a_platform *platform = platform_get_drvdata(pdev);
1181 struct dentry *gpu_root = platform->debugfs; 1339 struct dentry *gpu_root = platform->debugfs;
1182 1340
1183 balloc_debugfs_root = debugfs_create_dir("allocators", gpu_root); 1341 gk20a_alloc_debugfs_root = debugfs_create_dir("allocators", gpu_root);
1184 if (IS_ERR_OR_NULL(balloc_debugfs_root)) 1342 if (IS_ERR_OR_NULL(gk20a_alloc_debugfs_root))
1185 return; 1343 return;
1186 1344
1187 debugfs_create_u32("tracing", 0664, balloc_debugfs_root, 1345 debugfs_create_u32("tracing", 0664, gk20a_alloc_debugfs_root,
1188 &balloc_tracing_on); 1346 &gk20a_alloc_tracing_on);
1189} 1347}
1190#endif 1348#endif
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
index e86e053b..74e23e6c 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
@@ -20,10 +20,49 @@
20#include <linux/list.h> 20#include <linux/list.h>
21#include <linux/rbtree.h> 21#include <linux/rbtree.h>
22#include <linux/debugfs.h> 22#include <linux/debugfs.h>
23#include <linux/seq_file.h>
23#include <linux/platform_device.h> 24#include <linux/platform_device.h>
24 25
25/* #define ALLOCATOR_DEBUG */ 26/* #define ALLOCATOR_DEBUG */
26 27
28struct gk20a_allocator;
29struct vm_gk20a;
30
31/*
32 * Operations for an allocator to implement.
33 */
34struct gk20a_allocator_ops {
35 u64 (*alloc)(struct gk20a_allocator *allocator, u64 len);
36 void (*free)(struct gk20a_allocator *allocator, u64 addr);
37
38 /*
39 * Special interface to allocate a memory region with a specific
40 * starting address. Yikes. Note: if free() works for freeing both
41 * regular and fixed allocations then free_fixed() does not need to
42 * be implemented. This behavior exists for legacy reasons and should
43 * not be propagated to new allocators.
44 */
45 u64 (*alloc_fixed)(struct gk20a_allocator *allocator,
46 u64 base, u64 len);
47 void (*free_fixed)(struct gk20a_allocator *allocator,
48 u64 base, u64 len);
49
50 /*
51 * Returns info about the allocator.
52 */
53 u64 (*base)(struct gk20a_allocator *allocator);
54 u64 (*length)(struct gk20a_allocator *allocator);
55 u64 (*end)(struct gk20a_allocator *allocator);
56 int (*inited)(struct gk20a_allocator *allocator);
57
58 /* Destructor. */
59 void (*fini)(struct gk20a_allocator *allocator);
60
61 /* Debugging. */
62 void (*print_stats)(struct gk20a_allocator *allocator,
63 struct seq_file *s, int lock);
64};
65
27/* 66/*
28 * Each buddy is an element in a binary tree. 67 * Each buddy is an element in a binary tree.
29 */ 68 */
@@ -97,8 +136,6 @@ struct gk20a_fixed_alloc {
97 u64 end; /* End address. */ 136 u64 end; /* End address. */
98}; 137};
99 138
100struct vm_gk20a;
101
102/* 139/*
103 * GPU buddy allocator for the various GPU address spaces. Each addressable unit 140 * GPU buddy allocator for the various GPU address spaces. Each addressable unit
104 * doesn't have to correspond to a byte. In some cases each unit is a more 141 * doesn't have to correspond to a byte. In some cases each unit is a more
@@ -109,12 +146,10 @@ struct vm_gk20a;
109 * 146 *
110 * order_size is the size of an order 0 buddy. 147 * order_size is the size of an order 0 buddy.
111 */ 148 */
112struct gk20a_allocator { 149struct gk20a_buddy_allocator {
113 150 struct gk20a_allocator *owner; /* Owner of this buddy allocator. */
114 struct vm_gk20a *vm; /* Parent VM - can be NULL. */ 151 struct vm_gk20a *vm; /* Parent VM - can be NULL. */
115 152
116 char name[32]; /* Name of allocator. */
117
118 u64 base; /* Base address of the space. */ 153 u64 base; /* Base address of the space. */
119 u64 length; /* Length of the space. */ 154 u64 length; /* Length of the space. */
120 u64 blk_size; /* Size of order 0 allocation. */ 155 u64 blk_size; /* Size of order 0 allocation. */
@@ -132,11 +167,6 @@ struct gk20a_allocator {
132 struct rb_root alloced_buddies; /* Outstanding allocations. */ 167 struct rb_root alloced_buddies; /* Outstanding allocations. */
133 struct rb_root fixed_allocs; /* Outstanding fixed allocations. */ 168 struct rb_root fixed_allocs; /* Outstanding fixed allocations. */
134 169
135 struct mutex lock; /* Protects buddy access. */
136
137#define GPU_BALLOC_GVA_SPACE 0x1
138 u64 flags;
139
140 /* 170 /*
141 * Impose an upper bound on the maximum order. 171 * Impose an upper bound on the maximum order.
142 */ 172 */
@@ -155,52 +185,121 @@ struct gk20a_allocator {
155 */ 185 */
156 u64 pte_blk_order; 186 u64 pte_blk_order;
157 187
158 struct dentry *debugfs_entry; 188 int inited;
189
190#define GPU_BALLOC_GVA_SPACE 0x1
191 u64 flags;
159 192
160 u64 bytes_alloced; 193 u64 bytes_alloced;
161 u64 bytes_alloced_real; 194 u64 bytes_alloced_real;
162 u64 bytes_freed; 195 u64 bytes_freed;
163}; 196};
164 197
165#define balloc_lock(a) mutex_lock(&(a)->lock) 198struct gk20a_allocator {
166#define balloc_unlock(a) mutex_unlock(&(a)->lock) 199 char name[32];
200 struct mutex lock;
167 201
168#define balloc_get_order_list(a, order) (&(a)->buddy_list[(order)]) 202 void *priv;
169#define balloc_order_to_len(a, order) ((1 << order) * (a)->blk_size) 203 const struct gk20a_allocator_ops *ops;
170#define balloc_base_shift(a, base) ((base) - (a)->start)
171#define balloc_base_unshift(a, base) ((base) + (a)->start)
172 204
173int gk20a_allocator_init(struct gk20a_allocator *allocator, 205 struct dentry *debugfs_entry;
174 const char *name, u64 base, u64 size, u64 order0); 206};
175int __gk20a_allocator_init(struct gk20a_allocator *allocator, 207
176 struct vm_gk20a *vm, const char *name, 208static inline void alloc_lock(struct gk20a_allocator *a)
177 u64 base, u64 size, u64 order0, 209{
178 u64 max_order, u64 flags); 210 mutex_lock(&a->lock);
179void gk20a_allocator_destroy(struct gk20a_allocator *allocator); 211}
212
213static inline void alloc_unlock(struct gk20a_allocator *a)
214{
215 mutex_unlock(&a->lock);
216}
217
218static inline struct gk20a_buddy_allocator *buddy_allocator(
219 struct gk20a_allocator *a)
220{
221 return (struct gk20a_buddy_allocator *)a->priv;
222}
223
224static inline struct list_head *balloc_get_order_list(
225 struct gk20a_buddy_allocator *a, int order)
226{
227 return &a->buddy_list[order];
228}
229
230static inline u64 balloc_order_to_len(struct gk20a_buddy_allocator *a,
231 int order)
232{
233 return (1 << order) * a->blk_size;
234}
235
236static inline u64 balloc_base_shift(struct gk20a_buddy_allocator *a,
237 u64 base)
238{
239 return base - a->start;
240}
241
242static inline u64 balloc_base_unshift(struct gk20a_buddy_allocator *a,
243 u64 base)
244{
245 return base + a->start;
246}
247
248static inline struct gk20a_allocator *balloc_owner(
249 struct gk20a_buddy_allocator *a)
250{
251 return a->owner;
252}
180 253
181/* 254/*
182 * Normal alloc/free operations for the buddy allocator. 255 * Buddy allocator specific initializers.
183 */ 256 */
184u64 gk20a_balloc(struct gk20a_allocator *allocator, u64 len); 257int __gk20a_buddy_allocator_init(struct gk20a_allocator *a,
185void gk20a_bfree(struct gk20a_allocator *allocator, u64 addr); 258 struct vm_gk20a *vm, const char *name,
259 u64 base, u64 size, u64 blk_size,
260 u64 max_order, u64 flags);
261int gk20a_buddy_allocator_init(struct gk20a_allocator *allocator,
262 const char *name, u64 base, u64 size,
263 u64 blk_size, u64 flags);
186 264
187/* 265/*
188 * Special interface to allocate a memory regions with a specific starting 266 * Allocator APIs.
189 * address. Yikes.
190 */ 267 */
191u64 gk20a_balloc_fixed(struct gk20a_allocator *allocator, u64 base, u64 len); 268u64 gk20a_alloc(struct gk20a_allocator *allocator, u64 len);
269void gk20a_free(struct gk20a_allocator *allocator, u64 addr);
270
271u64 gk20a_alloc_fixed(struct gk20a_allocator *allocator, u64 base, u64 len);
272void gk20a_free_fixed(struct gk20a_allocator *allocator, u64 base, u64 len);
273
274u64 gk20a_alloc_base(struct gk20a_allocator *a);
275u64 gk20a_alloc_length(struct gk20a_allocator *a);
276u64 gk20a_alloc_end(struct gk20a_allocator *a);
277u64 gk20a_alloc_initialized(struct gk20a_allocator *a);
278
279void gk20a_alloc_destroy(struct gk20a_allocator *allocator);
280
281void gk20a_alloc_print_stats(struct gk20a_allocator *a,
282 struct seq_file *s, int lock);
192 283
193/* 284/*
194 * Debugfs init. 285 * Debug stuff.
195 */ 286 */
196void gk20a_alloc_debugfs_init(struct platform_device *pdev); 287void gk20a_alloc_debugfs_init(struct platform_device *pdev);
197 288
289#define __alloc_pstat(seq, allocator, fmt, arg...) \
290 do { \
291 if (s) \
292 seq_printf(seq, fmt, ##arg); \
293 else \
294 alloc_dbg(allocator, fmt, ##arg); \
295 } while (0)
296
198#if defined(ALLOCATOR_DEBUG) 297#if defined(ALLOCATOR_DEBUG)
199#define balloc_dbg(alloctor, format, arg...) \ 298#define alloc_dbg(allocator, format, arg...) \
200 pr_info("%-25s %25s() " format, \ 299 pr_info("%-25s %25s() " format, \
201 alloctor->name, __func__, ##arg) 300 allocator->name, __func__, ##arg)
202#else 301#else
203#define balloc_dbg(allocator, format, arg...) 302#define alloc_dbg(allocator, format, arg...)
204#endif 303#endif
205 304
206#endif /* GK20A_ALLOCATOR_H */ 305#endif /* GK20A_ALLOCATOR_H */
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 673aafda..ffc695f5 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1331,7 +1331,7 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
1331 gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size, 1331 gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size,
1332 vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10); 1332 vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10);
1333 1333
1334 offset = gk20a_balloc(vma, size); 1334 offset = gk20a_alloc(vma, size);
1335 if (!offset) { 1335 if (!offset) {
1336 gk20a_err(dev_from_vm(vm), 1336 gk20a_err(dev_from_vm(vm),
1337 "%s oom: sz=0x%llx", vma->name, size); 1337 "%s oom: sz=0x%llx", vma->name, size);
@@ -1350,7 +1350,7 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
1350 1350
1351 gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", 1351 gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx",
1352 vma->name, offset, size); 1352 vma->name, offset, size);
1353 gk20a_bfree(vma, offset); 1353 gk20a_free(vma, offset);
1354 1354
1355 return 0; 1355 return 0;
1356} 1356}
@@ -3407,12 +3407,12 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm)
3407 * 3407 *
3408 * !!! TODO: cleanup. 3408 * !!! TODO: cleanup.
3409 */ 3409 */
3410 sema_sea->gpu_va = gk20a_balloc_fixed(&vm->vma[gmmu_page_size_kernel], 3410 sema_sea->gpu_va = gk20a_alloc_fixed(&vm->vma[gmmu_page_size_kernel],
3411 vm->va_limit - 3411 vm->va_limit -
3412 mm->channel.kernel_size, 3412 mm->channel.kernel_size,
3413 512 * PAGE_SIZE); 3413 512 * PAGE_SIZE);
3414 if (!sema_sea->gpu_va) { 3414 if (!sema_sea->gpu_va) {
3415 gk20a_bfree(&vm->vma[gmmu_page_size_small], sema_sea->gpu_va); 3415 gk20a_free(&vm->vma[gmmu_page_size_small], sema_sea->gpu_va);
3416 gk20a_vm_put(vm); 3416 gk20a_vm_put(vm);
3417 return -ENOMEM; 3417 return -ENOMEM;
3418 } 3418 }
@@ -3420,7 +3420,7 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm)
3420 err = gk20a_semaphore_pool_map(vm->sema_pool, vm); 3420 err = gk20a_semaphore_pool_map(vm->sema_pool, vm);
3421 if (err) { 3421 if (err) {
3422 gk20a_semaphore_pool_unmap(vm->sema_pool, vm); 3422 gk20a_semaphore_pool_unmap(vm->sema_pool, vm);
3423 gk20a_bfree(&vm->vma[gmmu_page_size_small], 3423 gk20a_free(&vm->vma[gmmu_page_size_small],
3424 vm->sema_pool->gpu_va); 3424 vm->sema_pool->gpu_va);
3425 gk20a_vm_put(vm); 3425 gk20a_vm_put(vm);
3426 } 3426 }
@@ -3542,13 +3542,13 @@ int gk20a_init_vm(struct mm_gk20a *mm,
3542 snprintf(alloc_name, sizeof(alloc_name), 3542 snprintf(alloc_name, sizeof(alloc_name),
3543 "gk20a_%s-fixed", name); 3543 "gk20a_%s-fixed", name);
3544 3544
3545 err = __gk20a_allocator_init(&vm->fixed, 3545 err = __gk20a_buddy_allocator_init(&vm->fixed,
3546 vm, alloc_name, 3546 vm, alloc_name,
3547 small_vma_start, 3547 small_vma_start,
3548 g->separate_fixed_allocs, 3548 g->separate_fixed_allocs,
3549 SZ_4K, 3549 SZ_4K,
3550 GPU_BALLOC_MAX_ORDER, 3550 GPU_BALLOC_MAX_ORDER,
3551 GPU_BALLOC_GVA_SPACE); 3551 GPU_BALLOC_GVA_SPACE);
3552 if (err) 3552 if (err)
3553 goto clean_up_ptes; 3553 goto clean_up_ptes;
3554 3554
@@ -3559,13 +3559,14 @@ int gk20a_init_vm(struct mm_gk20a *mm,
3559 if (small_vma_start < small_vma_limit) { 3559 if (small_vma_start < small_vma_limit) {
3560 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, 3560 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name,
3561 vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); 3561 vm->gmmu_page_sizes[gmmu_page_size_small] >> 10);
3562 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small], 3562 err = __gk20a_buddy_allocator_init(
3563 vm, alloc_name, 3563 &vm->vma[gmmu_page_size_small],
3564 small_vma_start, 3564 vm, alloc_name,
3565 small_vma_limit - small_vma_start, 3565 small_vma_start,
3566 SZ_4K, 3566 small_vma_limit - small_vma_start,
3567 GPU_BALLOC_MAX_ORDER, 3567 SZ_4K,
3568 GPU_BALLOC_GVA_SPACE); 3568 GPU_BALLOC_MAX_ORDER,
3569 GPU_BALLOC_GVA_SPACE);
3569 if (err) 3570 if (err)
3570 goto clean_up_ptes; 3571 goto clean_up_ptes;
3571 } 3572 }
@@ -3573,13 +3574,14 @@ int gk20a_init_vm(struct mm_gk20a *mm,
3573 if (large_vma_start < large_vma_limit) { 3574 if (large_vma_start < large_vma_limit) {
3574 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", 3575 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB",
3575 name, vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); 3576 name, vm->gmmu_page_sizes[gmmu_page_size_big] >> 10);
3576 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big], 3577 err = __gk20a_buddy_allocator_init(
3577 vm, alloc_name, 3578 &vm->vma[gmmu_page_size_big],
3578 large_vma_start, 3579 vm, alloc_name,
3579 large_vma_limit - large_vma_start, 3580 large_vma_start,
3580 big_page_size, 3581 large_vma_limit - large_vma_start,
3581 GPU_BALLOC_MAX_ORDER, 3582 big_page_size,
3582 GPU_BALLOC_GVA_SPACE); 3583 GPU_BALLOC_MAX_ORDER,
3584 GPU_BALLOC_GVA_SPACE);
3583 if (err) 3585 if (err)
3584 goto clean_up_small_allocator; 3586 goto clean_up_small_allocator;
3585 } 3587 }
@@ -3589,13 +3591,13 @@ int gk20a_init_vm(struct mm_gk20a *mm,
3589 /* 3591 /*
3590 * kernel reserved VMA is at the end of the aperture 3592 * kernel reserved VMA is at the end of the aperture
3591 */ 3593 */
3592 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_kernel], 3594 err = __gk20a_buddy_allocator_init(&vm->vma[gmmu_page_size_kernel],
3593 vm, alloc_name, 3595 vm, alloc_name,
3594 kernel_vma_start, 3596 kernel_vma_start,
3595 kernel_vma_limit - kernel_vma_start, 3597 kernel_vma_limit - kernel_vma_start,
3596 SZ_4K, 3598 SZ_4K,
3597 GPU_BALLOC_MAX_ORDER, 3599 GPU_BALLOC_MAX_ORDER,
3598 GPU_BALLOC_GVA_SPACE); 3600 GPU_BALLOC_GVA_SPACE);
3599 if (err) 3601 if (err)
3600 goto clean_up_big_allocator; 3602 goto clean_up_big_allocator;
3601 3603
@@ -3620,10 +3622,10 @@ int gk20a_init_vm(struct mm_gk20a *mm,
3620 3622
3621clean_up_big_allocator: 3623clean_up_big_allocator:
3622 if (large_vma_start < large_vma_limit) 3624 if (large_vma_start < large_vma_limit)
3623 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); 3625 gk20a_alloc_destroy(&vm->vma[gmmu_page_size_big]);
3624clean_up_small_allocator: 3626clean_up_small_allocator:
3625 if (small_vma_start < small_vma_limit) 3627 if (small_vma_start < small_vma_limit)
3626 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); 3628 gk20a_alloc_destroy(&vm->vma[gmmu_page_size_small]);
3627clean_up_ptes: 3629clean_up_ptes:
3628 free_gmmu_pages(vm, &vm->pdb); 3630 free_gmmu_pages(vm, &vm->pdb);
3629clean_up_pdes: 3631clean_up_pdes:
@@ -3730,15 +3732,15 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
3730 3732
3731 vma = &vm->vma[pgsz_idx]; 3733 vma = &vm->vma[pgsz_idx];
3732 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) { 3734 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) {
3733 if (vm->fixed.init) 3735 if (gk20a_alloc_initialized(&vm->fixed))
3734 vma = &vm->fixed; 3736 vma = &vm->fixed;
3735 vaddr_start = gk20a_balloc_fixed(vma, args->o_a.offset, 3737 vaddr_start = gk20a_alloc_fixed(vma, args->o_a.offset,
3736 (u64)args->pages * 3738 (u64)args->pages *
3737 (u64)args->page_size); 3739 (u64)args->page_size);
3738 } else { 3740 } else {
3739 vaddr_start = gk20a_balloc(vma, 3741 vaddr_start = gk20a_alloc(vma,
3740 (u64)args->pages * 3742 (u64)args->pages *
3741 (u64)args->page_size); 3743 (u64)args->page_size);
3742 } 3744 }
3743 3745
3744 if (!vaddr_start) { 3746 if (!vaddr_start) {
@@ -3772,7 +3774,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
3772 APERTURE_INVALID); 3774 APERTURE_INVALID);
3773 if (!map_offset) { 3775 if (!map_offset) {
3774 mutex_unlock(&vm->update_gmmu_lock); 3776 mutex_unlock(&vm->update_gmmu_lock);
3775 gk20a_bfree(vma, vaddr_start); 3777 gk20a_free(vma, vaddr_start);
3776 kfree(va_node); 3778 kfree(va_node);
3777 goto clean_up; 3779 goto clean_up;
3778 } 3780 }
@@ -3807,11 +3809,11 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
3807 pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ? 3809 pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ?
3808 gmmu_page_size_big : gmmu_page_size_small; 3810 gmmu_page_size_big : gmmu_page_size_small;
3809 3811
3810 if (vm->fixed.init) 3812 if (gk20a_alloc_initialized(&vm->fixed))
3811 vma = &vm->fixed; 3813 vma = &vm->fixed;
3812 else 3814 else
3813 vma = &vm->vma[pgsz_idx]; 3815 vma = &vm->vma[pgsz_idx];
3814 gk20a_bfree(vma, args->offset); 3816 gk20a_free(vma, args->offset);
3815 3817
3816 mutex_lock(&vm->update_gmmu_lock); 3818 mutex_lock(&vm->update_gmmu_lock);
3817 va_node = addr_to_reservation(vm, args->offset); 3819 va_node = addr_to_reservation(vm, args->offset);
@@ -3995,13 +3997,13 @@ int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
3995 3997
3996void gk20a_deinit_vm(struct vm_gk20a *vm) 3998void gk20a_deinit_vm(struct vm_gk20a *vm)
3997{ 3999{
3998 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_kernel]); 4000 gk20a_alloc_destroy(&vm->vma[gmmu_page_size_kernel]);
3999 if (vm->vma[gmmu_page_size_big].init) 4001 if (gk20a_alloc_initialized(&vm->vma[gmmu_page_size_big]))
4000 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); 4002 gk20a_alloc_destroy(&vm->vma[gmmu_page_size_big]);
4001 if (vm->vma[gmmu_page_size_small].init) 4003 if (gk20a_alloc_initialized(&vm->vma[gmmu_page_size_small]))
4002 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); 4004 gk20a_alloc_destroy(&vm->vma[gmmu_page_size_small]);
4003 if (vm->fixed.init) 4005 if (gk20a_alloc_initialized(&vm->fixed))
4004 gk20a_allocator_destroy(&vm->fixed); 4006 gk20a_alloc_destroy(&vm->fixed);
4005 4007
4006 gk20a_vm_free_entries(vm, &vm->pdb, 0); 4008 gk20a_vm_free_entries(vm, &vm->pdb, 0);
4007} 4009}
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 2e9172c7..66e46480 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -424,12 +424,13 @@ static inline u64 __nv_gmmu_va_small_page_limit(void)
424 424
425static inline int __nv_gmmu_va_is_big_page_region(struct vm_gk20a *vm, u64 addr) 425static inline int __nv_gmmu_va_is_big_page_region(struct vm_gk20a *vm, u64 addr)
426{ 426{
427 struct gk20a_allocator *a = &vm->vma[gmmu_page_size_big];
428
427 if (!vm->big_pages) 429 if (!vm->big_pages)
428 return 0; 430 return 0;
429 431
430 return addr >= vm->vma[gmmu_page_size_big].base && 432 return addr >= gk20a_alloc_base(a) &&
431 addr < vm->vma[gmmu_page_size_big].base + 433 addr < gk20a_alloc_base(a) + gk20a_alloc_length(a);
432 vm->vma[gmmu_page_size_big].length;
433} 434}
434 435
435/* 436/*
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index 347d7158..a3898993 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -2856,8 +2856,8 @@ void gk20a_remove_pmu_support(struct pmu_gk20a *pmu)
2856{ 2856{
2857 gk20a_dbg_fn(""); 2857 gk20a_dbg_fn("");
2858 2858
2859 if (pmu->dmem.init) 2859 if (gk20a_alloc_initialized(&pmu->dmem))
2860 gk20a_allocator_destroy(&pmu->dmem); 2860 gk20a_alloc_destroy(&pmu->dmem);
2861} 2861}
2862 2862
2863static int gk20a_init_pmu_reset_enable_hw(struct gk20a *g) 2863static int gk20a_init_pmu_reset_enable_hw(struct gk20a *g)
@@ -3503,7 +3503,7 @@ static int pmu_init_perfmon(struct pmu_gk20a *pmu)
3503 gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data); 3503 gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data);
3504 3504
3505 if (!pmu->sample_buffer) 3505 if (!pmu->sample_buffer)
3506 pmu->sample_buffer = gk20a_balloc(&pmu->dmem, 3506 pmu->sample_buffer = gk20a_alloc(&pmu->dmem,
3507 2 * sizeof(u16)); 3507 2 * sizeof(u16));
3508 if (!pmu->sample_buffer) { 3508 if (!pmu->sample_buffer) {
3509 gk20a_err(dev_from_gk20a(g), 3509 gk20a_err(dev_from_gk20a(g),
@@ -3605,7 +3605,7 @@ static int pmu_process_init_msg(struct pmu_gk20a *pmu,
3605 for (i = 0; i < PMU_QUEUE_COUNT; i++) 3605 for (i = 0; i < PMU_QUEUE_COUNT; i++)
3606 pmu_queue_init(pmu, i, init); 3606 pmu_queue_init(pmu, i, init);
3607 3607
3608 if (!pmu->dmem.init) { 3608 if (!gk20a_alloc_initialized(&pmu->dmem)) {
3609 /* Align start and end addresses */ 3609 /* Align start and end addresses */
3610 u32 start = ALIGN(pv->get_pmu_init_msg_pmu_sw_mg_off(init), 3610 u32 start = ALIGN(pv->get_pmu_init_msg_pmu_sw_mg_off(init),
3611 PMU_DMEM_ALLOC_ALIGNMENT); 3611 PMU_DMEM_ALLOC_ALIGNMENT);
@@ -3613,9 +3613,9 @@ static int pmu_process_init_msg(struct pmu_gk20a *pmu,
3613 pv->get_pmu_init_msg_pmu_sw_mg_size(init)) & 3613 pv->get_pmu_init_msg_pmu_sw_mg_size(init)) &
3614 ~(PMU_DMEM_ALLOC_ALIGNMENT - 1); 3614 ~(PMU_DMEM_ALLOC_ALIGNMENT - 1);
3615 u32 size = end - start; 3615 u32 size = end - start;
3616 __gk20a_allocator_init(&pmu->dmem, NULL, "gk20a_pmu_dmem", 3616 gk20a_buddy_allocator_init(&pmu->dmem, "gk20a_pmu_dmem",
3617 start, size, 3617 start, size,
3618 PMU_DMEM_ALLOC_ALIGNMENT, 4, 0); 3618 PMU_DMEM_ALLOC_ALIGNMENT, 0);
3619 } 3619 }
3620 3620
3621 pmu->pmu_ready = true; 3621 pmu->pmu_ready = true;
@@ -3752,12 +3752,12 @@ static int pmu_response_handle(struct pmu_gk20a *pmu,
3752 seq->callback = NULL; 3752 seq->callback = NULL;
3753 if (pv->pmu_allocation_get_dmem_size(pmu, 3753 if (pv->pmu_allocation_get_dmem_size(pmu,
3754 pv->get_pmu_seq_in_a_ptr(seq)) != 0) 3754 pv->get_pmu_seq_in_a_ptr(seq)) != 0)
3755 gk20a_bfree(&pmu->dmem, 3755 gk20a_free(&pmu->dmem,
3756 pv->pmu_allocation_get_dmem_offset(pmu, 3756 pv->pmu_allocation_get_dmem_offset(pmu,
3757 pv->get_pmu_seq_in_a_ptr(seq))); 3757 pv->get_pmu_seq_in_a_ptr(seq)));
3758 if (pv->pmu_allocation_get_dmem_size(pmu, 3758 if (pv->pmu_allocation_get_dmem_size(pmu,
3759 pv->get_pmu_seq_out_a_ptr(seq)) != 0) 3759 pv->get_pmu_seq_out_a_ptr(seq)) != 0)
3760 gk20a_bfree(&pmu->dmem, 3760 gk20a_free(&pmu->dmem,
3761 pv->pmu_allocation_get_dmem_offset(pmu, 3761 pv->pmu_allocation_get_dmem_offset(pmu,
3762 pv->get_pmu_seq_out_a_ptr(seq))); 3762 pv->get_pmu_seq_out_a_ptr(seq)));
3763 3763
@@ -4418,7 +4418,7 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
4418 (u16)max(payload->in.size, payload->out.size)); 4418 (u16)max(payload->in.size, payload->out.size));
4419 4419
4420 *(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)) = 4420 *(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)) =
4421 gk20a_balloc(&pmu->dmem, 4421 gk20a_alloc(&pmu->dmem,
4422 pv->pmu_allocation_get_dmem_size(pmu, in)); 4422 pv->pmu_allocation_get_dmem_size(pmu, in));
4423 if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu, in))) 4423 if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)))
4424 goto clean_up; 4424 goto clean_up;
@@ -4443,7 +4443,7 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
4443 if (payload->out.buf != payload->in.buf) { 4443 if (payload->out.buf != payload->in.buf) {
4444 4444
4445 *(pv->pmu_allocation_get_dmem_offset_addr(pmu, out)) = 4445 *(pv->pmu_allocation_get_dmem_offset_addr(pmu, out)) =
4446 gk20a_balloc(&pmu->dmem, 4446 gk20a_alloc(&pmu->dmem,
4447 pv->pmu_allocation_get_dmem_size(pmu, out)); 4447 pv->pmu_allocation_get_dmem_size(pmu, out));
4448 if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu, 4448 if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu,
4449 out))) 4449 out)))
@@ -4474,10 +4474,10 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
4474clean_up: 4474clean_up:
4475 gk20a_dbg_fn("fail"); 4475 gk20a_dbg_fn("fail");
4476 if (in) 4476 if (in)
4477 gk20a_bfree(&pmu->dmem, 4477 gk20a_free(&pmu->dmem,
4478 pv->pmu_allocation_get_dmem_offset(pmu, in)); 4478 pv->pmu_allocation_get_dmem_offset(pmu, in));
4479 if (out) 4479 if (out)
4480 gk20a_bfree(&pmu->dmem, 4480 gk20a_free(&pmu->dmem,
4481 pv->pmu_allocation_get_dmem_offset(pmu, out)); 4481 pv->pmu_allocation_get_dmem_offset(pmu, out));
4482 4482
4483 pmu_seq_release(pmu, seq); 4483 pmu_seq_release(pmu, seq);
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index 2239fcbc..c6f42703 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -230,11 +230,11 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm)
230 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); 230 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
231 WARN_ON(err || msg.ret); 231 WARN_ON(err || msg.ret);
232 232
233 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_kernel]); 233 gk20a_alloc_destroy(&vm->vma[gmmu_page_size_kernel]);
234 if (vm->vma[gmmu_page_size_small].init) 234 if (gk20a_alloc_initialized(&vm->vma[gmmu_page_size_small]))
235 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); 235 gk20a_alloc_destroy(&vm->vma[gmmu_page_size_small]);
236 if (vm->vma[gmmu_page_size_big].init) 236 if (gk20a_alloc_initialized(&vm->vma[gmmu_page_size_big]))
237 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); 237 gk20a_alloc_destroy(&vm->vma[gmmu_page_size_big]);
238 238
239 mutex_unlock(&vm->update_gmmu_lock); 239 mutex_unlock(&vm->update_gmmu_lock);
240 240
@@ -374,7 +374,8 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
374 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, 374 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
375 gmmu_page_sizes[gmmu_page_size_small] >> 10); 375 gmmu_page_sizes[gmmu_page_size_small] >> 10);
376 376
377 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small], 377 err = __gk20a_buddy_allocator_init(
378 &vm->vma[gmmu_page_size_small],
378 vm, name, 379 vm, name,
379 small_vma_start, 380 small_vma_start,
380 small_vma_limit - small_vma_start, 381 small_vma_limit - small_vma_start,
@@ -388,7 +389,8 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
388 if (large_vma_start < large_vma_limit) { 389 if (large_vma_start < large_vma_limit) {
389 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, 390 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
390 gmmu_page_sizes[gmmu_page_size_big] >> 10); 391 gmmu_page_sizes[gmmu_page_size_big] >> 10);
391 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big], 392 err = __gk20a_buddy_allocator_init(
393 &vm->vma[gmmu_page_size_big],
392 vm, name, 394 vm, name,
393 large_vma_start, 395 large_vma_start,
394 large_vma_limit - large_vma_start, 396 large_vma_limit - large_vma_start,
@@ -404,7 +406,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
404 /* 406 /*
405 * kernel reserved VMA is at the end of the aperture 407 * kernel reserved VMA is at the end of the aperture
406 */ 408 */
407 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_kernel], 409 err = __gk20a_buddy_allocator_init(&vm->vma[gmmu_page_size_kernel],
408 vm, name, 410 vm, name,
409 kernel_vma_start, 411 kernel_vma_start,
410 kernel_vma_limit - kernel_vma_start, 412 kernel_vma_limit - kernel_vma_start,
@@ -426,10 +428,10 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
426 428
427clean_up_big_allocator: 429clean_up_big_allocator:
428 if (large_vma_start < large_vma_limit) 430 if (large_vma_start < large_vma_limit)
429 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); 431 gk20a_alloc_destroy(&vm->vma[gmmu_page_size_big]);
430clean_up_small_allocator: 432clean_up_small_allocator:
431 if (small_vma_start < small_vma_limit) 433 if (small_vma_start < small_vma_limit)
432 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); 434 gk20a_alloc_destroy(&vm->vma[gmmu_page_size_small]);
433clean_up_share: 435clean_up_share:
434 msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE; 436 msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE;
435 msg.handle = platform->virt_handle; 437 msg.handle = platform->virt_handle;