summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gk20a/as_gk20a.c15
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator.c1167
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator.h213
-rw-r--r--drivers/gpu/nvgpu/gk20a/ltc_gk20a.c5
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c202
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.c68
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.h4
-rw-r--r--drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c15
-rw-r--r--drivers/gpu/nvgpu/gm20b/ltc_gm20b.c5
-rw-r--r--drivers/gpu/nvgpu/vgpu/ltc_vgpu.c5
-rw-r--r--drivers/gpu/nvgpu/vgpu/mm_vgpu.c36
13 files changed, 1385 insertions, 353 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
index 63569008..eb18fa65 100644
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
@@ -199,21 +199,14 @@ static int gk20a_as_ioctl_get_va_regions(
199 199
200 for (i = 0; i < write_entries; ++i) { 200 for (i = 0; i < write_entries; ++i) {
201 struct nvgpu_as_va_region region; 201 struct nvgpu_as_va_region region;
202 u32 base, limit;
203 202
204 memset(&region, 0, sizeof(struct nvgpu_as_va_region)); 203 memset(&region, 0, sizeof(struct nvgpu_as_va_region));
205 204
206 if (!vm->vma[i].constraint.enable) {
207 base = vm->vma[i].base;
208 limit = vm->vma[i].limit;
209 } else {
210 base = vm->vma[i].constraint.base;
211 limit = vm->vma[i].constraint.limit;
212 }
213
214 region.page_size = vm->gmmu_page_sizes[i]; 205 region.page_size = vm->gmmu_page_sizes[i];
215 region.offset = (u64)base * region.page_size; 206 region.offset = vm->vma[i].base;
216 region.pages = limit - base; /* NOTE: limit is exclusive */ 207 /* No __aeabi_uldivmod() on some platforms... */
208 region.pages = (vm->vma[i].end - vm->vma[i].start) >>
209 ilog2(region.page_size);
217 210
218 if (copy_to_user(user_region_ptr + i, &region, sizeof(region))) 211 if (copy_to_user(user_region_ptr + i, &region, sizeof(region)))
219 return -EFAULT; 212 return -EFAULT;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index f3b5544f..2e88726a 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -59,6 +59,7 @@
59#include "hw_fb_gk20a.h" 59#include "hw_fb_gk20a.h"
60#include "gk20a_scale.h" 60#include "gk20a_scale.h"
61#include "dbg_gpu_gk20a.h" 61#include "dbg_gpu_gk20a.h"
62#include "gk20a_allocator.h"
62#include "hal.h" 63#include "hal.h"
63#include "vgpu/vgpu.h" 64#include "vgpu/vgpu.h"
64 65
@@ -1532,6 +1533,7 @@ static int gk20a_probe(struct platform_device *dev)
1532 gr_gk20a_debugfs_init(gk20a); 1533 gr_gk20a_debugfs_init(gk20a);
1533 gk20a_pmu_debugfs_init(dev); 1534 gk20a_pmu_debugfs_init(dev);
1534 gk20a_cde_debugfs_init(dev); 1535 gk20a_cde_debugfs_init(dev);
1536 gk20a_alloc_debugfs_init(dev);
1535#endif 1537#endif
1536 1538
1537 gk20a_init_gr(gk20a); 1539 gk20a_init_gr(gk20a);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
index 675a98a2..56fb22df 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * gk20a allocator 2 * gk20a allocator
3 * 3 *
4 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -16,112 +16,1149 @@
16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */ 17 */
18 18
19#include <linux/kernel.h>
20#include <linux/seq_file.h>
21#include <linux/slab.h>
22#include <linux/debugfs.h>
23
24#include "platform_gk20a.h"
19#include "gk20a_allocator.h" 25#include "gk20a_allocator.h"
20#include <linux/vmalloc.h>
21 26
22/* init allocator struct */ 27#include "mm_gk20a.h"
23int gk20a_allocator_init(struct gk20a_allocator *allocator, 28
24 const char *name, u32 start, u32 len) 29static struct dentry *balloc_debugfs_root;
30
31static struct kmem_cache *buddy_cache; /* slab cache for meta data. */
32
33static u32 balloc_tracing_on;
34
35#define balloc_trace_func() \
36 do { \
37 if (balloc_tracing_on) \
38 trace_printk("%s\n", __func__); \
39 } while (0)
40
41#define balloc_trace_func_done() \
42 do { \
43 if (balloc_tracing_on) \
44 trace_printk("%s_done\n", __func__); \
45 } while (0)
46
47
48static void balloc_init_alloc_debug(struct gk20a_allocator *a);
49static void balloc_print_stats(struct gk20a_allocator *a, struct seq_file *s,
50 int lock);
51static struct gk20a_buddy *balloc_free_buddy(struct gk20a_allocator *a,
52 u64 addr);
53static void balloc_coalesce(struct gk20a_allocator *a, struct gk20a_buddy *b);
54static void __balloc_do_free_fixed(struct gk20a_allocator *a,
55 struct gk20a_fixed_alloc *falloc);
56
57/*
58 * This function is not present in older kernel's list.h code.
59 */
60#ifndef list_last_entry
61#define list_last_entry(ptr, type, member) \
62 list_entry((ptr)->prev, type, member)
63#endif
64
65/*
66 * GPU buddy allocator for various address spaces.
67 *
68 * Current limitations:
69 * o A fixed allocation could potentially be made that borders PDEs with
70 * different PTE sizes. This would require that fixed buffer to have
71 * different sized PTEs for different parts of the allocation. Probably
72 * best to just require PDE alignment for fixed address allocs.
73 *
74 * o It is currently possible to make an allocator that has a buddy alignment
75 * out of sync with the PDE block size alignment. A simple example is a
76 * 32GB address space starting at byte 1. Every buddy is shifted off by 1
77 * which means each buddy corresponf to more than one actual GPU page. The
78 * best way to fix this is probably just require PDE blocksize alignment
79 * for the start of the address space. At the moment all allocators are
80 * easily PDE aligned so this hasn't been a problem.
81 */
82
83/*
84 * Pick a suitable maximum order for this allocator.
85 *
86 * Hueristic: Just guessing that the best max order is the largest single
87 * block that will fit in the address space.
88 */
89static void balloc_compute_max_order(struct gk20a_allocator *a)
90{
91 u64 true_max_order = ilog2(a->blks);
92
93 if (a->max_order > true_max_order)
94 a->max_order = true_max_order;
95 if (a->max_order > GPU_BALLOC_MAX_ORDER)
96 a->max_order = GPU_BALLOC_MAX_ORDER;
97}
98
99/*
100 * Since we can only allocate in chucks of a->blk_size we need to trim off
101 * any excess data that is not aligned to a->blk_size.
102 */
103static void balloc_allocator_align(struct gk20a_allocator *a)
104{
105 a->start = ALIGN(a->base, a->blk_size);
106 a->end = (a->base + a->length) & ~(a->blk_size - 1);
107 a->count = a->end - a->start;
108 a->blks = a->count >> a->blk_shift;
109}
110
111/*
112 * Pass NULL for parent if you want a top level buddy.
113 */
114static struct gk20a_buddy *balloc_new_buddy(struct gk20a_allocator *a,
115 struct gk20a_buddy *parent,
116 u64 start, u64 order)
117{
118 struct gk20a_buddy *new_buddy;
119
120 new_buddy = kmem_cache_alloc(buddy_cache, GFP_KERNEL);
121 if (!new_buddy)
122 return NULL;
123
124 memset(new_buddy, 0, sizeof(struct gk20a_buddy));
125
126 new_buddy->parent = parent;
127 new_buddy->start = start;
128 new_buddy->order = order;
129 new_buddy->end = start + (1 << order) * a->blk_size;
130
131 return new_buddy;
132}
133
134static void __balloc_buddy_list_add(struct gk20a_allocator *a,
135 struct gk20a_buddy *b,
136 struct list_head *list)
137{
138 if (buddy_is_in_list(b)) {
139 balloc_dbg(a, "Oops: adding added buddy (%llu:0x%llx)\n",
140 b->order, b->start);
141 BUG();
142 }
143
144 /*
145 * Add big PTE blocks to the tail, small to the head for GVA spaces.
146 * This lets the code that checks if there are available blocks check
147 * without cycling through the entire list.
148 */
149 if (a->flags & GPU_BALLOC_GVA_SPACE &&
150 b->pte_size == BALLOC_PTE_SIZE_BIG)
151 list_add_tail(&b->buddy_entry, list);
152 else
153 list_add(&b->buddy_entry, list);
154
155 buddy_set_in_list(b);
156}
157
158static void __balloc_buddy_list_rem(struct gk20a_allocator *a,
159 struct gk20a_buddy *b)
160{
161 if (!buddy_is_in_list(b)) {
162 balloc_dbg(a, "Oops: removing removed buddy (%llu:0x%llx)\n",
163 b->order, b->start);
164 BUG();
165 }
166
167 list_del_init(&b->buddy_entry);
168 buddy_clr_in_list(b);
169}
170
171/*
172 * Add a buddy to one of the buddy lists and deal with the necessary
173 * book keeping. Adds the buddy to the list specified by the buddy's order.
174 */
175static void balloc_blist_add(struct gk20a_allocator *a, struct gk20a_buddy *b)
176{
177 __balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order));
178 a->buddy_list_len[b->order]++;
179}
180
181static void balloc_blist_rem(struct gk20a_allocator *a, struct gk20a_buddy *b)
182{
183 __balloc_buddy_list_rem(a, b);
184 a->buddy_list_len[b->order]--;
185}
186
187static u64 balloc_get_order(struct gk20a_allocator *a, u64 len)
188{
189 if (len == 0)
190 return 0;
191
192 len--;
193 len >>= a->blk_shift;
194
195 return fls(len);
196}
197
198static u64 __balloc_max_order_in(struct gk20a_allocator *a, u64 start, u64 end)
199{
200 u64 size = (end - start) >> a->blk_shift;
201
202 if (size > 0)
203 return min_t(u64, ilog2(size), a->max_order);
204 else
205 return GPU_BALLOC_MAX_ORDER;
206}
207
208/*
209 * Initialize the buddy lists.
210 */
211static int balloc_init_lists(struct gk20a_allocator *a)
212{
213 int i;
214 u64 bstart, bend, order;
215 struct gk20a_buddy *buddy;
216
217 bstart = a->start;
218 bend = a->end;
219
220 /* First make sure the LLs are valid. */
221 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++)
222 INIT_LIST_HEAD(balloc_get_order_list(a, i));
223
224 while (bstart < bend) {
225 order = __balloc_max_order_in(a, bstart, bend);
226
227 buddy = balloc_new_buddy(a, NULL, bstart, order);
228 if (!buddy)
229 goto cleanup;
230
231 balloc_blist_add(a, buddy);
232 bstart += balloc_order_to_len(a, order);
233 }
234
235 return 0;
236
237cleanup:
238 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) {
239 if (!list_empty(balloc_get_order_list(a, i))) {
240 buddy = list_first_entry(balloc_get_order_list(a, i),
241 struct gk20a_buddy, buddy_entry);
242 balloc_blist_rem(a, buddy);
243 kmem_cache_free(buddy_cache, buddy);
244 }
245 }
246
247 return -ENOMEM;
248}
249
250/*
251 * Initialize a buddy allocator. Returns 0 on success. This allocator does
252 * not necessarily manage bytes. It manages distinct ranges of resources. This
253 * allows the allocator to work for things like comp_tags, semaphores, etc.
254 *
255 * @allocator: Ptr to an allocator struct to init.
256 * @vm: GPU VM to associate this allocator with. Can be NULL. Will be used to
257 * get PTE size for GVA spaces.
258 * @name: Name of the allocator. Doesn't have to be static storage.
259 * @base: The base address of the resource pool being managed.
260 * @size: Number of resources in the pool.
261 * @blk_size: Minimum number of resources to allocate at once. For things like
262 * semaphores this is 1. For GVA this might be as much as 64k. This
263 * corresponds to order 0. Must be power of 2.
264 * @max_order: Pick a maximum order. If you leave this as 0, the buddy allocator
265 * will try and pick a reasonable max order.
266 * @flags: Extra flags necessary. See GPU_BALLOC_*.
267 */
268int __gk20a_allocator_init(struct gk20a_allocator *a,
269 struct vm_gk20a *vm, const char *name,
270 u64 base, u64 size, u64 blk_size, u64 max_order,
271 u64 flags)
25{ 272{
26 memset(allocator, 0, sizeof(struct gk20a_allocator)); 273 int err;
274
275 memset(a, 0, sizeof(struct gk20a_allocator));
276 strncpy(a->name, name, 32);
277
278 a->base = base;
279 a->length = size;
280 a->blk_size = blk_size;
281 a->blk_shift = __ffs(blk_size);
282
283 /* blk_size must be greater than 0 and a power of 2. */
284 if (blk_size == 0)
285 return -EINVAL;
286 if (blk_size & (blk_size - 1))
287 return -EINVAL;
288
289 if (max_order > GPU_BALLOC_MAX_ORDER)
290 return -EINVAL;
291
292 /* If this is to manage a GVA space we need a VM. */
293 if (flags & GPU_BALLOC_GVA_SPACE && !vm)
294 return -EINVAL;
295
296 a->vm = vm;
297 if (flags & GPU_BALLOC_GVA_SPACE)
298 a->pte_blk_order = balloc_get_order(a, vm->big_page_size << 10);
27 299
28 strncpy(allocator->name, name, 32); 300 a->flags = flags;
301 a->max_order = max_order;
29 302
30 allocator->base = start; 303 balloc_allocator_align(a);
31 allocator->limit = start + len - 1; 304 balloc_compute_max_order(a);
32 305
33 allocator->bitmap = vzalloc(BITS_TO_LONGS(len) * sizeof(long)); 306 /* Shared buddy kmem_cache for all allocators. */
34 if (!allocator->bitmap) 307 if (!buddy_cache)
308 buddy_cache = KMEM_CACHE(gk20a_buddy, 0);
309 if (!buddy_cache)
35 return -ENOMEM; 310 return -ENOMEM;
36 311
37 allocator_dbg(allocator, "%s : base %d, limit %d", 312 a->alloced_buddies = RB_ROOT;
38 allocator->name, allocator->base, allocator->limit); 313 err = balloc_init_lists(a);
314 if (err)
315 return err;
39 316
40 init_rwsem(&allocator->rw_sema); 317 mutex_init(&a->lock);
41 318
42 allocator->alloc = gk20a_allocator_block_alloc; 319 a->init = 1;
43 allocator->free = gk20a_allocator_block_free; 320
321 balloc_init_alloc_debug(a);
322 balloc_dbg(a, "New allocator: base 0x%llx\n", a->base);
323 balloc_dbg(a, " size 0x%llx\n", a->length);
324 balloc_dbg(a, " blk_size 0x%llx\n", a->blk_size);
325 balloc_dbg(a, " max_order %llu\n", a->max_order);
326 balloc_dbg(a, " flags 0x%llx\n", a->flags);
44 327
45 return 0; 328 return 0;
46} 329}
47 330
48/* destroy allocator, free all remaining blocks if any */ 331int gk20a_allocator_init(struct gk20a_allocator *a, const char *name,
49void gk20a_allocator_destroy(struct gk20a_allocator *allocator) 332 u64 base, u64 size, u64 blk_size)
333{
334 return __gk20a_allocator_init(a, NULL, name,
335 base, size, blk_size, 0, 0);
336}
337
338/*
339 * Clean up and destroy the passed allocator.
340 */
341void gk20a_allocator_destroy(struct gk20a_allocator *a)
50{ 342{
51 down_write(&allocator->rw_sema); 343 struct rb_node *node;
344 struct gk20a_buddy *bud;
345 struct gk20a_fixed_alloc *falloc;
346 int i;
347
348 balloc_lock(a);
349
350 if (!IS_ERR_OR_NULL(a->debugfs_entry))
351 debugfs_remove(a->debugfs_entry);
352
353 /*
354 * Free the fixed allocs first.
355 */
356 while ((node = rb_first(&a->fixed_allocs)) != NULL) {
357 falloc = container_of(node,
358 struct gk20a_fixed_alloc, alloced_entry);
359
360 __balloc_do_free_fixed(a, falloc);
361 rb_erase(node, &a->fixed_allocs);
362 }
363
364 /*
365 * And now free all outstanding allocations.
366 */
367 while ((node = rb_first(&a->alloced_buddies)) != NULL) {
368 bud = container_of(node, struct gk20a_buddy, alloced_entry);
369 balloc_free_buddy(a, bud->start);
370 balloc_blist_add(a, bud);
371 balloc_coalesce(a, bud);
372 }
52 373
53 vfree(allocator->bitmap); 374 /*
375 * Now clean up the unallocated buddies.
376 */
377 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) {
378 BUG_ON(a->buddy_list_alloced[i] != 0);
379
380 while (!list_empty(balloc_get_order_list(a, i))) {
381 bud = list_first_entry(balloc_get_order_list(a, i),
382 struct gk20a_buddy, buddy_entry);
383 balloc_blist_rem(a, bud);
384 kmem_cache_free(buddy_cache, bud);
385 }
386
387 if (a->buddy_list_len[i] != 0) {
388 pr_info("Excess buddies!!! (%d: %llu)\n",
389 i, a->buddy_list_len[i]);
390 BUG();
391 }
392 if (a->buddy_list_split[i] != 0) {
393 pr_info("Excess split nodes!!! (%d: %llu)\n",
394 i, a->buddy_list_split[i]);
395 BUG();
396 }
397 if (a->buddy_list_alloced[i] != 0) {
398 pr_info("Excess alloced nodes!!! (%d: %llu)\n",
399 i, a->buddy_list_alloced[i]);
400 BUG();
401 }
402 }
54 403
55 memset(allocator, 0, sizeof(struct gk20a_allocator)); 404 a->init = 0;
405
406 balloc_unlock(a);
407
408 /*
409 * We cant unlock an allocator after memsetting it. That wipes the
410 * state of the mutex. Hopefully no one uses the allocator after
411 * destroying it...
412 */
413 memset(a, 0, sizeof(struct gk20a_allocator));
56} 414}
57 415
58/* 416/*
59 * *addr != ~0 for fixed address allocation. if *addr == 0, base addr is 417 * Combine the passed buddy if possible. The pointer in @b may not be valid
60 * returned to caller in *addr. 418 * after this as the buddy may be freed.
61 * 419 *
62 * contiguous allocation, which allocates one block of 420 * @a must be locked.
63 * contiguous address. 421 */
64*/ 422static void balloc_coalesce(struct gk20a_allocator *a, struct gk20a_buddy *b)
65int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator,
66 u32 *addr, u32 len, u32 align)
67{ 423{
68 unsigned long _addr; 424 struct gk20a_buddy *parent;
69 425
70 allocator_dbg(allocator, "[in] addr %d, len %d", *addr, len); 426 if (buddy_is_alloced(b) || buddy_is_split(b))
427 return;
71 428
72 if ((*addr != 0 && *addr < allocator->base) || /* check addr range */ 429 /*
73 *addr + len > allocator->limit || /* check addr range */ 430 * If both our buddy and I are both not allocated and not split then
74 *addr & (align - 1) || /* check addr alignment */ 431 * we can coalesce ourselves.
75 len == 0) /* check len */ 432 */
76 return -EINVAL; 433 if (!b->buddy)
434 return;
435 if (buddy_is_alloced(b->buddy) || buddy_is_split(b->buddy))
436 return;
437
438 parent = b->parent;
439
440 balloc_blist_rem(a, b);
441 balloc_blist_rem(a, b->buddy);
442
443 buddy_clr_split(parent);
444 a->buddy_list_split[parent->order]--;
445 balloc_blist_add(a, parent);
446
447 /*
448 * Recursively coalesce as far as we can go.
449 */
450 balloc_coalesce(a, parent);
451
452 /* Clean up the remains. */
453 kmem_cache_free(buddy_cache, b->buddy);
454 kmem_cache_free(buddy_cache, b);
455}
456
457/*
458 * Split a buddy into two new buddies who are 1/2 the size of the parent buddy.
459 *
460 * @a must be locked.
461 */
462static int balloc_split_buddy(struct gk20a_allocator *a, struct gk20a_buddy *b,
463 int pte_size)
464{
465 struct gk20a_buddy *left, *right;
466 u64 half;
77 467
78 len = ALIGN(len, align); 468 left = balloc_new_buddy(a, b, b->start, b->order - 1);
79 if (!len) 469 if (!left)
80 return -ENOMEM; 470 return -ENOMEM;
81 471
82 down_write(&allocator->rw_sema); 472 half = (b->end - b->start) / 2;
83 473
84 _addr = bitmap_find_next_zero_area(allocator->bitmap, 474 right = balloc_new_buddy(a, b, b->start + half, b->order - 1);
85 allocator->limit - allocator->base + 1, 475 if (!right) {
86 *addr ? (*addr - allocator->base) : 0, 476 kmem_cache_free(buddy_cache, left);
87 len,
88 align - 1);
89 if ((_addr > allocator->limit - allocator->base + 1) ||
90 (*addr && *addr != (_addr + allocator->base))) {
91 up_write(&allocator->rw_sema);
92 return -ENOMEM; 477 return -ENOMEM;
93 } 478 }
94 479
95 bitmap_set(allocator->bitmap, _addr, len); 480 buddy_set_split(b);
96 *addr = allocator->base + _addr; 481 a->buddy_list_split[b->order]++;
97 482
98 up_write(&allocator->rw_sema); 483 b->left = left;
484 b->right = right;
485 left->buddy = right;
486 right->buddy = left;
487 left->parent = b;
488 right->parent = b;
99 489
100 allocator_dbg(allocator, "[out] addr %d, len %d", *addr, len); 490 /* PTE considerations. */
491 if (a->flags & GPU_BALLOC_GVA_SPACE &&
492 left->order <= a->pte_blk_order) {
493 left->pte_size = pte_size;
494 right->pte_size = pte_size;
495 }
496
497 balloc_blist_rem(a, b);
498 balloc_blist_add(a, left);
499 balloc_blist_add(a, right);
101 500
102 return 0; 501 return 0;
103} 502}
104 503
105/* free all blocks between start and end */ 504/*
106int gk20a_allocator_block_free(struct gk20a_allocator *allocator, 505 * Place the passed buddy into the RB tree for allocated buddies. Never fails
107 u32 addr, u32 len, u32 align) 506 * unless the passed entry is a duplicate which is a bug.
507 *
508 * @a must be locked.
509 */
510void balloc_alloc_buddy(struct gk20a_allocator *a, struct gk20a_buddy *b)
108{ 511{
109 allocator_dbg(allocator, "[in] addr %d, len %d", addr, len); 512 struct rb_node **new = &(a->alloced_buddies.rb_node);
513 struct rb_node *parent = NULL;
110 514
111 if (addr + len > allocator->limit || /* check addr range */ 515 while (*new) {
112 addr < allocator->base || 516 struct gk20a_buddy *bud = container_of(*new, struct gk20a_buddy,
113 addr & (align - 1)) /* check addr alignment */ 517 alloced_entry);
114 return -EINVAL;
115 518
116 len = ALIGN(len, align); 519 parent = *new;
117 if (!len) 520 if (b->start < bud->start)
118 return -EINVAL; 521 new = &((*new)->rb_left);
522 else if (b->start > bud->start)
523 new = &((*new)->rb_right);
524 else
525 BUG_ON("Duplicate entries in allocated list!\n");
526 }
527
528 rb_link_node(&b->alloced_entry, parent, new);
529 rb_insert_color(&b->alloced_entry, &a->alloced_buddies);
530
531 buddy_set_alloced(b);
532 a->buddy_list_alloced[b->order]++;
533}
534
535/*
536 * Remove the passed buddy from the allocated buddy RB tree. Returns the
537 * deallocated buddy for further processing.
538 *
539 * @a must be locked.
540 */
541static struct gk20a_buddy *balloc_free_buddy(struct gk20a_allocator *a,
542 u64 addr)
543{
544 struct rb_node *node = a->alloced_buddies.rb_node;
545 struct gk20a_buddy *bud;
546
547 while (node) {
548 bud = container_of(node, struct gk20a_buddy, alloced_entry);
549
550 if (addr < bud->start)
551 node = node->rb_left;
552 else if (addr > bud->start)
553 node = node->rb_right;
554 else
555 break;
556 }
557
558 if (!node)
559 return NULL;
560
561 rb_erase(node, &a->alloced_buddies);
562 buddy_clr_alloced(bud);
563 a->buddy_list_alloced[bud->order]--;
564
565 return bud;
566}
567
568/*
569 * Find a suitable buddy for the given order and PTE type (big or little).
570 */
571static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_allocator *a,
572 u64 order, int pte_size)
573{
574 struct gk20a_buddy *bud;
575
576 if (list_empty(balloc_get_order_list(a, order)))
577 return NULL;
578
579 if (a->flags & GPU_BALLOC_GVA_SPACE &&
580 pte_size == BALLOC_PTE_SIZE_BIG)
581 bud = list_last_entry(balloc_get_order_list(a, order),
582 struct gk20a_buddy, buddy_entry);
583 else
584 bud = list_first_entry(balloc_get_order_list(a, order),
585 struct gk20a_buddy, buddy_entry);
586
587 if (bud->pte_size != BALLOC_PTE_SIZE_ANY &&
588 bud->pte_size != pte_size)
589 return NULL;
590
591 return bud;
592}
593
594/*
595 * Allocate a suitably sized buddy. If no suitable buddy exists split higher
596 * order buddies until we have a suitable buddy to allocate.
597 *
598 * For PDE grouping add an extra check to see if a buddy is suitable: that the
599 * buddy exists in a PDE who's PTE size is reasonable
600 *
601 * @a must be locked.
602 */
603static u64 __balloc_do_alloc(struct gk20a_allocator *a, u64 order, int pte_size)
604{
605 u64 split_order;
606 struct gk20a_buddy *bud;
607
608 split_order = order;
609 while (!(bud = __balloc_find_buddy(a, split_order, pte_size)))
610 split_order++;
611
612 while (bud->order != order) {
613 if (balloc_split_buddy(a, bud, pte_size))
614 return 0; /* No mem... */
615 bud = bud->left;
616 }
617
618 balloc_blist_rem(a, bud);
619 balloc_alloc_buddy(a, bud);
119 620
120 down_write(&allocator->rw_sema); 621 return bud->start;
121 bitmap_clear(allocator->bitmap, addr - allocator->base, len); 622}
122 up_write(&allocator->rw_sema); 623
624/*
625 * Allocate memory from the passed allocator.
626 */
627u64 gk20a_balloc(struct gk20a_allocator *a, u64 len)
628{
629 u64 order, addr;
630 int pte_size;
631
632 balloc_trace_func();
633
634 balloc_lock(a);
635
636 order = balloc_get_order(a, len);
637
638 if (order > a->max_order) {
639 balloc_unlock(a);
640 balloc_dbg(a, "Alloc fail\n");
641 balloc_trace_func_done();
642 return 0;
643 }
644
645 /*
646 * For now pass the base address of the allocator's region to
647 * __get_pte_size(). This ensures we get the right page size for
648 * the alloc but we don't have to know what the real address is
649 * going to be quite yet.
650 *
651 * TODO: once userspace supports a unified address space pass 0 for
652 * the base. This will make only 'len' affect the PTE size.
653 */
654 if (a->flags & GPU_BALLOC_GVA_SPACE)
655 pte_size = __get_pte_size(a->vm, a->base, len);
656 else
657 pte_size = BALLOC_PTE_SIZE_ANY;
658
659 addr = __balloc_do_alloc(a, order, pte_size);
660
661 a->bytes_alloced += len;
662 a->bytes_alloced_real += balloc_order_to_len(a, order);
663
664 balloc_unlock(a);
665 balloc_dbg(a, "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n",
666 addr, order, len,
667 pte_size == gmmu_page_size_big ? "big" :
668 pte_size == gmmu_page_size_small ? "small" :
669 "NA/any");
670
671 balloc_trace_func_done();
672 return addr;
673}
674
675/*
676 * See if the passed range is actually available for allocation. If so, then
677 * return 1, otherwise return 0.
678 *
679 * TODO: Right now this uses the unoptimal approach of going through all
680 * outstanding allocations and checking their base/ends. This could be better.
681 */
682static int balloc_is_range_free(struct gk20a_allocator *a, u64 base, u64 end)
683{
684 struct rb_node *node;
685 struct gk20a_buddy *bud;
686
687 node = rb_first(&a->alloced_buddies);
688 if (!node)
689 return 1; /* No allocs yet. */
690
691 bud = container_of(node, struct gk20a_buddy, alloced_entry);
692
693 while (bud->start < end) {
694 if ((bud->start > base && bud->start < end) ||
695 (bud->end > base && bud->end < end))
696 return 0;
697
698 node = rb_next(node);
699 if (!node)
700 break;
701 bud = container_of(node, struct gk20a_buddy, alloced_entry);
702 }
703
704 return 1;
705}
706
707static void balloc_alloc_fixed(struct gk20a_allocator *a,
708 struct gk20a_fixed_alloc *f)
709{
710 struct rb_node **new = &(a->fixed_allocs.rb_node);
711 struct rb_node *parent = NULL;
712
713 while (*new) {
714 struct gk20a_fixed_alloc *falloc =
715 container_of(*new, struct gk20a_fixed_alloc,
716 alloced_entry);
717
718 parent = *new;
719 if (f->start < falloc->start)
720 new = &((*new)->rb_left);
721 else if (f->start > falloc->start)
722 new = &((*new)->rb_right);
723 else
724 BUG_ON("Duplicate entries in allocated list!\n");
725 }
726
727 rb_link_node(&f->alloced_entry, parent, new);
728 rb_insert_color(&f->alloced_entry, &a->fixed_allocs);
729}
730
731/*
732 * Remove the passed buddy from the allocated buddy RB tree. Returns the
733 * deallocated buddy for further processing.
734 *
735 * @a must be locked.
736 */
737static struct gk20a_fixed_alloc *balloc_free_fixed(struct gk20a_allocator *a,
738 u64 addr)
739{
740 struct rb_node *node = a->fixed_allocs.rb_node;
741 struct gk20a_fixed_alloc *falloc;
742
743 while (node) {
744 falloc = container_of(node,
745 struct gk20a_fixed_alloc, alloced_entry);
746
747 if (addr < falloc->start)
748 node = node->rb_left;
749 else if (addr > falloc->start)
750 node = node->rb_right;
751 else
752 break;
753 }
754
755 if (!node)
756 return NULL;
757
758 rb_erase(node, &a->fixed_allocs);
759
760 return falloc;
761}
762
763/*
764 * Find the parent range - doesn't necessarily need the parent to actually exist
765 * as a buddy. Finding an existing parent comes later...
766 */
767static void __balloc_get_parent_range(struct gk20a_allocator *a,
768 u64 base, u64 order,
769 u64 *pbase, u64 *porder)
770{
771 u64 base_mask;
772 u64 shifted_base = balloc_base_shift(a, base);
773
774 order++;
775 base_mask = ~((a->blk_size << order) - 1);
776
777 shifted_base &= base_mask;
778
779 *pbase = balloc_base_unshift(a, shifted_base);
780 *porder = order;
781}
782
783/*
784 * Makes a buddy at the passed address. This will make all parent buddies
785 * necessary for this buddy to exist as well.
786 */
787static struct gk20a_buddy *__balloc_make_fixed_buddy(struct gk20a_allocator *a,
788 u64 base, u64 order)
789{
790 struct gk20a_buddy *bud = NULL;
791 struct list_head *order_list;
792 u64 cur_order = order, cur_base = base;
793
794 /*
795 * Algo:
796 * 1. Keep jumping up a buddy order until we find the real buddy that
797 * this buddy exists in.
798 * 2. Then work our way down through the buddy tree until we hit a dead
799 * end.
800 * 3. Start splitting buddies until we split to the one we need to
801 * make.
802 */
803 while (cur_order <= a->max_order) {
804 int found = 0;
805
806 order_list = balloc_get_order_list(a, cur_order);
807 list_for_each_entry(bud, order_list, buddy_entry) {
808 if (bud->start == cur_base) {
809 found = 1;
810 break;
811 }
812 }
813
814 if (found)
815 break;
816
817 __balloc_get_parent_range(a, cur_base, cur_order,
818 &cur_base, &cur_order);
819 }
820
821 if (cur_order > a->max_order) {
822 balloc_dbg(a, "No buddy for range ???\n");
823 return NULL;
824 }
825
826 /* Split this buddy as necessary until we get the target buddy. */
827 while (bud->start != base || bud->order != order) {
828 if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) {
829 balloc_coalesce(a, bud);
830 return NULL;
831 }
832
833 if (base < bud->right->start)
834 bud = bud->left;
835 else
836 bud = bud->right;
837
838 }
839
840 return bud;
841}
842
843static u64 __balloc_do_alloc_fixed(struct gk20a_allocator *a,
844 struct gk20a_fixed_alloc *falloc,
845 u64 base, u64 len)
846{
847 u64 shifted_base, inc_base;
848 u64 align_order;
849
850 shifted_base = balloc_base_shift(a, base);
851 if (shifted_base == 0)
852 align_order = __fls(len >> a->blk_shift);
853 else
854 align_order = min_t(u64,
855 __ffs(shifted_base >> a->blk_shift),
856 __fls(len >> a->blk_shift));
857
858 if (align_order > a->max_order) {
859 balloc_dbg(a, "Align order too big: %llu > %llu\n",
860 align_order, a->max_order);
861 return 0;
862 }
863
864 /*
865 * Generate a list of buddies that satisfy this allocation.
866 */
867 inc_base = shifted_base;
868 while (inc_base < (shifted_base + len)) {
869 u64 order_len = balloc_order_to_len(a, align_order);
870 u64 remaining;
871 struct gk20a_buddy *bud;
872
873 bud = __balloc_make_fixed_buddy(a,
874 balloc_base_unshift(a, inc_base),
875 align_order);
876 if (!bud) {
877 balloc_dbg(a, "Fixed buddy failed: {0x%llx, %llu}!\n",
878 balloc_base_unshift(a, inc_base),
879 align_order);
880 goto err_and_cleanup;
881 }
882
883 balloc_blist_rem(a, bud);
884 balloc_alloc_buddy(a, bud);
885 __balloc_buddy_list_add(a, bud, &falloc->buddies);
886
887 /* Book keeping. */
888 inc_base += order_len;
889 remaining = (shifted_base + len) - inc_base;
890 align_order = __ffs(inc_base >> a->blk_shift);
891
892 /* If we don't have much left - trim down align_order. */
893 if (balloc_order_to_len(a, align_order) > remaining)
894 align_order = __balloc_max_order_in(a, inc_base,
895 inc_base + remaining);
896 }
897
898 return base;
123 899
124 allocator_dbg(allocator, "[out] addr %d, len %d", addr, len); 900err_and_cleanup:
901 while (!list_empty(&falloc->buddies)) {
902 struct gk20a_buddy *bud = list_first_entry(&falloc->buddies,
903 struct gk20a_buddy,
904 buddy_entry);
905
906 __balloc_buddy_list_rem(a, bud);
907 balloc_free_buddy(a, bud->start);
908 kmem_cache_free(buddy_cache, bud);
909 }
910
911 return 0;
912}
913
914/*
915 * Allocate a fixed address allocation. The address of the allocation is @base
916 * and the length is @len. This is not a typical buddy allocator operation and
917 * as such has a high posibility of failure if the address space is heavily in
918 * use.
919 *
920 * Please do not use this function unless _absolutely_ necessary.
921 */
922u64 gk20a_balloc_fixed(struct gk20a_allocator *a, u64 base, u64 len)
923{
924 struct gk20a_fixed_alloc *falloc = NULL;
925 struct gk20a_buddy *bud;
926 u64 ret, real_bytes = 0;
927
928 balloc_trace_func();
929
930 /* If base isn't aligned to an order 0 block, fail. */
931 if (base & (a->blk_size - 1))
932 goto fail;
933
934 if (len == 0)
935 goto fail;
936
937 falloc = kmalloc(sizeof(*falloc), GFP_KERNEL);
938 if (!falloc)
939 goto fail;
940
941 INIT_LIST_HEAD(&falloc->buddies);
942 falloc->start = base;
943 falloc->end = base + len;
944
945 balloc_lock(a);
946 if (!balloc_is_range_free(a, base, base + len)) {
947 balloc_dbg(a, "Range not free: 0x%llx -> 0x%llx\n",
948 base, base + len);
949 goto fail_unlock;
950 }
951
952 ret = __balloc_do_alloc_fixed(a, falloc, base, len);
953 if (!ret) {
954 balloc_dbg(a, "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n",
955 base, base + len);
956 goto fail_unlock;
957 }
958
959 balloc_alloc_fixed(a, falloc);
960
961 list_for_each_entry(bud, &falloc->buddies, buddy_entry)
962 real_bytes += (bud->end - bud->start);
963
964 a->bytes_alloced += len;
965 a->bytes_alloced_real += real_bytes;
966
967 balloc_unlock(a);
968 balloc_dbg(a, "Alloc (fixed) 0x%llx\n", base);
969
970 balloc_trace_func_done();
971 return base;
972
973fail_unlock:
974 balloc_unlock(a);
975fail:
976 kfree(falloc);
977 balloc_trace_func_done();
978 return 0;
979}
980
981static void __balloc_do_free_fixed(struct gk20a_allocator *a,
982 struct gk20a_fixed_alloc *falloc)
983{
984 struct gk20a_buddy *bud;
985
986 while (!list_empty(&falloc->buddies)) {
987 bud = list_first_entry(&falloc->buddies,
988 struct gk20a_buddy,
989 buddy_entry);
990 __balloc_buddy_list_rem(a, bud);
991
992 balloc_free_buddy(a, bud->start);
993 balloc_blist_add(a, bud);
994 a->bytes_freed += balloc_order_to_len(a, bud->order);
995
996 /*
997 * Attemp to defrag the allocation.
998 */
999 balloc_coalesce(a, bud);
1000 }
1001
1002 kfree(falloc);
1003}
1004
1005/*
1006 * Free the passed allocation.
1007 */
1008void gk20a_bfree(struct gk20a_allocator *a, u64 addr)
1009{
1010 struct gk20a_buddy *bud;
1011 struct gk20a_fixed_alloc *falloc;
1012
1013 balloc_trace_func();
1014
1015 if (!addr) {
1016 balloc_trace_func_done();
1017 return;
1018 }
1019
1020 balloc_lock(a);
1021
1022 /*
1023 * First see if this is a fixed alloc. If not fall back to a regular
1024 * buddy.
1025 */
1026 falloc = balloc_free_fixed(a, addr);
1027 if (falloc) {
1028 __balloc_do_free_fixed(a, falloc);
1029 goto done;
1030 }
1031
1032 bud = balloc_free_buddy(a, addr);
1033 if (!bud)
1034 goto done;
1035
1036 balloc_blist_add(a, bud);
1037 a->bytes_freed += balloc_order_to_len(a, bud->order);
1038
1039 /*
1040 * Attemp to defrag the allocation.
1041 */
1042 balloc_coalesce(a, bud);
1043
1044done:
1045 balloc_unlock(a);
1046 balloc_dbg(a, "Free 0x%llx\n", addr);
1047 balloc_trace_func_done();
1048 return;
1049}
1050
1051/*
1052 * Print the buddy allocator top level stats. If you pass @s as NULL then the
1053 * stats are printed to the kernel log. This lets this code be used for
1054 * debugging purposes internal to the allocator.
1055 */
1056static void balloc_print_stats(struct gk20a_allocator *a, struct seq_file *s,
1057 int lock)
1058{
1059#define __balloc_pstat(s, fmt, arg...) \
1060 do { \
1061 if (s) \
1062 seq_printf(s, fmt, ##arg); \
1063 else \
1064 balloc_dbg(a, fmt, ##arg); \
1065 } while (0)
1066
1067 int i;
1068 struct rb_node *node;
1069 struct gk20a_fixed_alloc *falloc;
1070
1071 __balloc_pstat(s, "base = %llu, limit = %llu, blk_size = %llu\n",
1072 a->base, a->length, a->blk_size);
1073 __balloc_pstat(s, "Internal params:\n");
1074 __balloc_pstat(s, " start = %llu\n", a->start);
1075 __balloc_pstat(s, " end = %llu\n", a->end);
1076 __balloc_pstat(s, " count = %llu\n", a->count);
1077 __balloc_pstat(s, " blks = %llu\n", a->blks);
1078 __balloc_pstat(s, " max_order = %llu\n", a->max_order);
1079
1080 __balloc_pstat(s, "Buddy blocks:\n");
1081 __balloc_pstat(s, " Order Free Alloced Split\n");
1082 __balloc_pstat(s, " ----- ---- ------- -----\n");
1083
1084 if (lock)
1085 balloc_lock(a);
1086 for (i = a->max_order; i >= 0; i--) {
1087 if (a->buddy_list_len[i] == 0 &&
1088 a->buddy_list_alloced[i] == 0 &&
1089 a->buddy_list_split[i] == 0)
1090 continue;
1091
1092 __balloc_pstat(s, " %3d %-7llu %-9llu %llu\n", i,
1093 a->buddy_list_len[i],
1094 a->buddy_list_alloced[i],
1095 a->buddy_list_split[i]);
1096 }
1097
1098 __balloc_pstat(s, "\n");
1099
1100 for (node = rb_first(&a->fixed_allocs), i = 1;
1101 node != NULL;
1102 node = rb_next(node)) {
1103 falloc = container_of(node,
1104 struct gk20a_fixed_alloc, alloced_entry);
1105
1106 __balloc_pstat(s, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n",
1107 i, falloc->start, falloc->end);
1108 }
1109
1110 __balloc_pstat(s, "\n");
1111 __balloc_pstat(s, "Bytes allocated: %llu\n", a->bytes_alloced);
1112 __balloc_pstat(s, "Bytes allocated (real): %llu\n",
1113 a->bytes_alloced_real);
1114 __balloc_pstat(s, "Bytes freed: %llu\n", a->bytes_freed);
1115
1116 if (lock)
1117 balloc_unlock(a);
1118
1119#undef __balloc_pstats
1120}
1121
1122static int __alloc_show(struct seq_file *s, void *unused)
1123{
1124 struct gk20a_allocator *a = s->private;
1125
1126 balloc_print_stats(a, s, 1);
125 1127
126 return 0; 1128 return 0;
127} 1129}
1130
1131static int __alloc_open(struct inode *inode, struct file *file)
1132{
1133 return single_open(file, __alloc_show, inode->i_private);
1134}
1135
1136static const struct file_operations __alloc_fops = {
1137 .open = __alloc_open,
1138 .read = seq_read,
1139 .llseek = seq_lseek,
1140 .release = single_release,
1141};
1142
1143static void balloc_init_alloc_debug(struct gk20a_allocator *a)
1144{
1145 if (!balloc_debugfs_root)
1146 return;
1147
1148 a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO,
1149 balloc_debugfs_root,
1150 a, &__alloc_fops);
1151}
1152
1153void gk20a_alloc_debugfs_init(struct platform_device *pdev)
1154{
1155 struct gk20a_platform *platform = platform_get_drvdata(pdev);
1156 struct dentry *gpu_root = platform->debugfs;
1157
1158 balloc_debugfs_root = debugfs_create_dir("allocators", gpu_root);
1159 if (IS_ERR_OR_NULL(balloc_debugfs_root))
1160 return;
1161
1162 debugfs_create_u32("tracing", 0664, balloc_debugfs_root,
1163 &balloc_tracing_on);
1164}
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
index 69a227bd..e86e053b 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License, 5 * under the terms and conditions of the GNU General Public License,
@@ -17,75 +17,190 @@
17#ifndef GK20A_ALLOCATOR_H 17#ifndef GK20A_ALLOCATOR_H
18#define GK20A_ALLOCATOR_H 18#define GK20A_ALLOCATOR_H
19 19
20#include <linux/list.h>
20#include <linux/rbtree.h> 21#include <linux/rbtree.h>
21#include <linux/rwsem.h> 22#include <linux/debugfs.h>
22#include <linux/slab.h> 23#include <linux/platform_device.h>
23 24
24/* #define ALLOCATOR_DEBUG */ 25/* #define ALLOCATOR_DEBUG */
25 26
26/* main struct */ 27/*
28 * Each buddy is an element in a binary tree.
29 */
30struct gk20a_buddy {
31 struct gk20a_buddy *parent; /* Parent node. */
32 struct gk20a_buddy *buddy; /* This node's buddy. */
33 struct gk20a_buddy *left; /* Lower address sub-node. */
34 struct gk20a_buddy *right; /* Higher address sub-node. */
35
36 struct list_head buddy_entry; /* List entry for various lists. */
37 struct rb_node alloced_entry; /* RB tree of allocations. */
38
39 u64 start; /* Start address of this buddy. */
40 u64 end; /* End address of this buddy. */
41 u64 order; /* Buddy order. */
42
43#define BALLOC_BUDDY_ALLOCED 0x1
44#define BALLOC_BUDDY_SPLIT 0x2
45#define BALLOC_BUDDY_IN_LIST 0x4
46 int flags; /* List of associated flags. */
47
48 /*
49 * Size of the PDE this buddy is using. This allows for grouping like
50 * sized allocations into the same PDE.
51 */
52#define BALLOC_PTE_SIZE_ANY 0x0
53#define BALLOC_PTE_SIZE_SMALL 0x1
54#define BALLOC_PTE_SIZE_BIG 0x2
55 int pte_size;
56};
57
58#define __buddy_flag_ops(flag, flag_up) \
59 static inline int buddy_is_ ## flag(struct gk20a_buddy *b) \
60 { \
61 return b->flags & BALLOC_BUDDY_ ## flag_up; \
62 } \
63 static inline void buddy_set_ ## flag(struct gk20a_buddy *b) \
64 { \
65 b->flags |= BALLOC_BUDDY_ ## flag_up; \
66 } \
67 static inline void buddy_clr_ ## flag(struct gk20a_buddy *b) \
68 { \
69 b->flags &= ~BALLOC_BUDDY_ ## flag_up; \
70 }
71
72/*
73 * int buddy_is_alloced(struct gk20a_buddy *b);
74 * void buddy_set_alloced(struct gk20a_buddy *b);
75 * void buddy_clr_alloced(struct gk20a_buddy *b);
76 *
77 * int buddy_is_split(struct gk20a_buddy *b);
78 * void buddy_set_split(struct gk20a_buddy *b);
79 * void buddy_clr_split(struct gk20a_buddy *b);
80 *
81 * int buddy_is_in_list(struct gk20a_buddy *b);
82 * void buddy_set_in_list(struct gk20a_buddy *b);
83 * void buddy_clr_in_list(struct gk20a_buddy *b);
84 */
85__buddy_flag_ops(alloced, ALLOCED);
86__buddy_flag_ops(split, SPLIT);
87__buddy_flag_ops(in_list, IN_LIST);
88
89/*
90 * Keeps info for a fixed allocation.
91 */
92struct gk20a_fixed_alloc {
93 struct list_head buddies; /* List of buddies. */
94 struct rb_node alloced_entry; /* RB tree of fixed allocations. */
95
96 u64 start; /* Start of fixed block. */
97 u64 end; /* End address. */
98};
99
100struct vm_gk20a;
101
102/*
103 * GPU buddy allocator for the various GPU address spaces. Each addressable unit
104 * doesn't have to correspond to a byte. In some cases each unit is a more
105 * complex object such as a comp_tag line or the like.
106 *
107 * The max order is computed based on the size of the minimum order and the size
108 * of the address space.
109 *
110 * order_size is the size of an order 0 buddy.
111 */
27struct gk20a_allocator { 112struct gk20a_allocator {
28 113
29 char name[32]; /* name for allocator */ 114 struct vm_gk20a *vm; /* Parent VM - can be NULL. */
30 struct rb_root rb_root; /* rb tree root for blocks */
31 115
32 u32 base; /* min value of this linear space */ 116 char name[32]; /* Name of allocator. */
33 u32 limit; /* max value = limit - 1 */
34 117
35 unsigned long *bitmap; /* bitmap */ 118 u64 base; /* Base address of the space. */
119 u64 length; /* Length of the space. */
120 u64 blk_size; /* Size of order 0 allocation. */
121 u64 blk_shift; /* Shift to divide by blk_size. */
36 122
37 struct gk20a_alloc_block *block_first; /* first block in list */ 123 int init; /* Non-zero if initialized. */
38 struct gk20a_alloc_block *block_recent; /* last visited block */
39 124
40 u32 first_free_addr; /* first free addr, non-contigous 125 /* Internal stuff. */
41 allocation preferred start, 126 u64 start; /* Real start (aligned to blk_size). */
42 in order to pick up small holes */ 127 u64 end; /* Real end, trimmed if needed. */
43 u32 last_free_addr; /* last free addr, contiguous 128 u64 count; /* Count of objects in space. */
44 allocation preferred start */ 129 u64 blks; /* Count of blks in the space. */
45 u32 cached_hole_size; /* max free hole size up to 130 u64 max_order; /* Specific maximum order. */
46 last_free_addr */
47 u32 block_count; /* number of blocks */
48 131
49 struct rw_semaphore rw_sema; /* lock */ 132 struct rb_root alloced_buddies; /* Outstanding allocations. */
50 struct kmem_cache *block_cache; /* slab cache */ 133 struct rb_root fixed_allocs; /* Outstanding fixed allocations. */
51 134
52 /* if enabled, constrain to [base, limit) */ 135 struct mutex lock; /* Protects buddy access. */
53 struct {
54 bool enable;
55 u32 base;
56 u32 limit;
57 } constraint;
58 136
59 int (*alloc)(struct gk20a_allocator *allocator, 137#define GPU_BALLOC_GVA_SPACE 0x1
60 u32 *addr, u32 len, u32 align); 138 u64 flags;
61 int (*free)(struct gk20a_allocator *allocator,
62 u32 addr, u32 len, u32 align);
63 139
64}; 140 /*
141 * Impose an upper bound on the maximum order.
142 */
143#define GPU_BALLOC_MAX_ORDER 31
144#define GPU_BALLOC_ORDER_LIST_LEN (GPU_BALLOC_MAX_ORDER + 1)
65 145
66int gk20a_allocator_init(struct gk20a_allocator *allocator, 146 struct list_head buddy_list[GPU_BALLOC_ORDER_LIST_LEN];
67 const char *name, u32 base, u32 size); 147 u64 buddy_list_len[GPU_BALLOC_ORDER_LIST_LEN];
68void gk20a_allocator_destroy(struct gk20a_allocator *allocator); 148 u64 buddy_list_split[GPU_BALLOC_ORDER_LIST_LEN];
149 u64 buddy_list_alloced[GPU_BALLOC_ORDER_LIST_LEN];
69 150
70int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator, 151 /*
71 u32 *addr, u32 len, u32 align); 152 * This is for when the allocator is managing a GVA space (the
153 * GPU_BALLOC_GVA_SPACE bit is set in @flags). This requires
154 * that we group like sized allocations into PDE blocks.
155 */
156 u64 pte_blk_order;
72 157
73int gk20a_allocator_block_free(struct gk20a_allocator *allocator, 158 struct dentry *debugfs_entry;
74 u32 addr, u32 len, u32 align);
75 159
76#if defined(ALLOCATOR_DEBUG) 160 u64 bytes_alloced;
161 u64 bytes_alloced_real;
162 u64 bytes_freed;
163};
77 164
78#define allocator_dbg(alloctor, format, arg...) \ 165#define balloc_lock(a) mutex_lock(&(a)->lock)
79do { \ 166#define balloc_unlock(a) mutex_unlock(&(a)->lock)
80 if (1) \
81 pr_debug("gk20a_allocator (%s) %s: " format "\n",\
82 alloctor->name, __func__, ##arg);\
83} while (0)
84 167
85#else /* ALLOCATOR_DEBUG */ 168#define balloc_get_order_list(a, order) (&(a)->buddy_list[(order)])
169#define balloc_order_to_len(a, order) ((1 << order) * (a)->blk_size)
170#define balloc_base_shift(a, base) ((base) - (a)->start)
171#define balloc_base_unshift(a, base) ((base) + (a)->start)
86 172
87#define allocator_dbg(format, arg...) 173int gk20a_allocator_init(struct gk20a_allocator *allocator,
174 const char *name, u64 base, u64 size, u64 order0);
175int __gk20a_allocator_init(struct gk20a_allocator *allocator,
176 struct vm_gk20a *vm, const char *name,
177 u64 base, u64 size, u64 order0,
178 u64 max_order, u64 flags);
179void gk20a_allocator_destroy(struct gk20a_allocator *allocator);
88 180
89#endif /* ALLOCATOR_DEBUG */ 181/*
182 * Normal alloc/free operations for the buddy allocator.
183 */
184u64 gk20a_balloc(struct gk20a_allocator *allocator, u64 len);
185void gk20a_bfree(struct gk20a_allocator *allocator, u64 addr);
186
187/*
188 * Special interface to allocate a memory regions with a specific starting
189 * address. Yikes.
190 */
191u64 gk20a_balloc_fixed(struct gk20a_allocator *allocator, u64 base, u64 len);
192
193/*
194 * Debugfs init.
195 */
196void gk20a_alloc_debugfs_init(struct platform_device *pdev);
197
198#if defined(ALLOCATOR_DEBUG)
199#define balloc_dbg(alloctor, format, arg...) \
200 pr_info("%-25s %25s() " format, \
201 alloctor->name, __func__, ##arg)
202#else
203#define balloc_dbg(allocator, format, arg...)
204#endif
90 205
91#endif /* GK20A_ALLOCATOR_H */ 206#endif /* GK20A_ALLOCATOR_H */
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
index 02bea0a1..7cb386f0 100644
--- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
@@ -89,9 +89,8 @@ static int gk20a_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
89 if (err) 89 if (err)
90 return err; 90 return err;
91 91
92 gk20a_allocator_init(&gr->comp_tags, "comptag", 92 __gk20a_allocator_init(&gr->comp_tags, NULL, "comptag",
93 1, /* start */ 93 1, max_comptag_lines - 1, 1, 10, 0);
94 max_comptag_lines - 1); /* length*/
95 94
96 gr->comptags_per_cacheline = comptags_per_cacheline; 95 gr->comptags_per_cacheline = comptags_per_cacheline;
97 gr->slices_per_ltc = slices_per_fbp / g->ltc_count; 96 gr->slices_per_ltc = slices_per_fbp / g->ltc_count;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 735c262a..a38db709 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -132,10 +132,8 @@ static void gk20a_mm_delete_priv(void *_priv)
132 132
133 if (priv->comptags.lines) { 133 if (priv->comptags.lines) {
134 BUG_ON(!priv->comptag_allocator); 134 BUG_ON(!priv->comptag_allocator);
135 priv->comptag_allocator->free(priv->comptag_allocator, 135 gk20a_bfree(priv->comptag_allocator,
136 priv->comptags.offset, 136 priv->comptags.real_offset);
137 priv->comptags.allocated_lines,
138 1);
139 } 137 }
140 138
141 /* Free buffer states */ 139 /* Free buffer states */
@@ -226,10 +224,9 @@ static int gk20a_alloc_comptags(struct gk20a *g,
226 u32 *ctag_map_win_ctagline) 224 u32 *ctag_map_win_ctagline)
227{ 225{
228 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); 226 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
229 u32 offset = 0;
230 int err;
231 u32 ctaglines_to_allocate; 227 u32 ctaglines_to_allocate;
232 u32 ctagline_align; 228 u32 ctagline_align = 1;
229 u32 offset;
233 const u32 aggregate_cacheline_sz = 230 const u32 aggregate_cacheline_sz =
234 g->gr.cacheline_size * g->gr.slices_per_ltc * 231 g->gr.cacheline_size * g->gr.slices_per_ltc *
235 g->ltc_count; 232 g->ltc_count;
@@ -243,7 +240,6 @@ static int gk20a_alloc_comptags(struct gk20a *g,
243 240
244 if (!user_mappable) { 241 if (!user_mappable) {
245 ctaglines_to_allocate = lines; 242 ctaglines_to_allocate = lines;
246 ctagline_align = 1;
247 } else { 243 } else {
248 /* Unfortunately, we cannot use allocation alignment 244 /* Unfortunately, we cannot use allocation alignment
249 * here, since compbits per cacheline is not always a 245 * here, since compbits per cacheline is not always a
@@ -275,82 +271,26 @@ static int gk20a_alloc_comptags(struct gk20a *g,
275 271
276 if (ctaglines_to_allocate < lines) 272 if (ctaglines_to_allocate < lines)
277 return -EINVAL; /* integer overflow */ 273 return -EINVAL; /* integer overflow */
274 pr_info("user-mapped CTAGS: %u\n", ctaglines_to_allocate);
278 } 275 }
279 276
280 /* store the allocator so we can use it when we free the ctags */ 277 /* store the allocator so we can use it when we free the ctags */
281 priv->comptag_allocator = allocator; 278 priv->comptag_allocator = allocator;
282 err = allocator->alloc(allocator, &offset, 279 offset = gk20a_balloc(allocator, ctaglines_to_allocate);
283 ctaglines_to_allocate, 1); 280 if (!offset)
284 if (!err) { 281 return -ENOMEM;
285 const u32 alignment_lines =
286 DIV_ROUND_UP(offset, ctagline_align) * ctagline_align -
287 offset;
288
289 /* prune the preceding ctaglines that were allocated
290 for alignment */
291 if (alignment_lines) {
292 /* free alignment lines */
293 int tmp=
294 allocator->free(allocator, offset,
295 alignment_lines,
296 1);
297 WARN_ON(tmp);
298
299 offset += alignment_lines;
300 ctaglines_to_allocate -= alignment_lines;
301 }
302 282
303 /* check if we can prune the trailing, too */ 283 priv->comptags.lines = lines;
304 if (user_mappable) 284 priv->comptags.real_offset = offset;
305 {
306 u32 needed_cachelines =
307 DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline);
308
309 u32 first_unneeded_cacheline =
310 DIV_ROUND_UP(round_up(needed_cachelines *
311 aggregate_cacheline_sz,
312 small_pgsz),
313 aggregate_cacheline_sz);
314 u32 needed_ctaglines =
315 first_unneeded_cacheline *
316 g->gr.comptags_per_cacheline;
317
318 u64 win_size;
319
320 if (needed_ctaglines < ctaglines_to_allocate) {
321 /* free alignment lines */
322 int tmp=
323 allocator->free(
324 allocator,
325 offset + needed_ctaglines,
326 (ctaglines_to_allocate -
327 needed_ctaglines),
328 1);
329 WARN_ON(tmp);
330
331 ctaglines_to_allocate = needed_ctaglines;
332 }
333 285
334 *ctag_map_win_ctagline = offset; 286 if (user_mappable)
335 win_size = 287 offset = DIV_ROUND_UP(offset, ctagline_align) * ctagline_align;
336 DIV_ROUND_UP(lines,
337 g->gr.comptags_per_cacheline) *
338 aggregate_cacheline_sz;
339 288
340 *ctag_map_win_size = round_up(win_size, small_pgsz); 289 priv->comptags.offset = offset;
341 }
342 290
343 priv->comptags.offset = offset; 291 return 0;
344 priv->comptags.lines = lines;
345 priv->comptags.allocated_lines = ctaglines_to_allocate;
346 priv->comptags.user_mappable = user_mappable;
347 }
348 return err;
349} 292}
350 293
351
352
353
354static int gk20a_init_mm_reset_enable_hw(struct gk20a *g) 294static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
355{ 295{
356 gk20a_dbg_fn(""); 296 gk20a_dbg_fn("");
@@ -901,14 +841,12 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset)
901} 841}
902 842
903u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, 843u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
904 u64 size, 844 u64 size,
905 enum gmmu_pgsz_gk20a gmmu_pgsz_idx) 845 enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
906 846
907{ 847{
908 struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx]; 848 struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx];
909 int err;
910 u64 offset; 849 u64 offset;
911 u32 start_page_nr = 0, num_pages;
912 u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; 850 u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx];
913 851
914 if (gmmu_pgsz_idx >= gmmu_nr_page_sizes) { 852 if (gmmu_pgsz_idx >= gmmu_nr_page_sizes) {
@@ -924,28 +862,19 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
924 862
925 } 863 }
926 864
927 /* be certain we round up to gmmu_page_size if needed */ 865 /* Be certain we round up to gmmu_page_size if needed */
928 /* TBD: DIV_ROUND_UP -> undefined reference to __aeabi_uldivmod */
929 size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1); 866 size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1);
930
931 gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size, 867 gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size,
932 vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10); 868 vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10);
933 869
934 /* The vma allocator represents page accounting. */ 870 offset = gk20a_balloc(vma, size);
935 num_pages = size >> ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]); 871 if (!offset) {
936
937 err = vma->alloc(vma, &start_page_nr, num_pages, 1);
938
939 if (err) {
940 gk20a_err(dev_from_vm(vm), 872 gk20a_err(dev_from_vm(vm),
941 "%s oom: sz=0x%llx", vma->name, size); 873 "%s oom: sz=0x%llx", vma->name, size);
942 return 0; 874 return 0;
943 } 875 }
944 876
945 offset = (u64)start_page_nr <<
946 ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]);
947 gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset); 877 gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset);
948
949 return offset; 878 return offset;
950} 879}
951 880
@@ -954,25 +883,12 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
954 enum gmmu_pgsz_gk20a pgsz_idx) 883 enum gmmu_pgsz_gk20a pgsz_idx)
955{ 884{
956 struct gk20a_allocator *vma = &vm->vma[pgsz_idx]; 885 struct gk20a_allocator *vma = &vm->vma[pgsz_idx];
957 u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
958 u32 page_shift = ilog2(page_size);
959 u32 start_page_nr, num_pages;
960 int err;
961 886
962 gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", 887 gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx",
963 vma->name, offset, size); 888 vma->name, offset, size);
889 gk20a_bfree(vma, offset);
964 890
965 start_page_nr = (u32)(offset >> page_shift); 891 return 0;
966 num_pages = (u32)((size + page_size - 1) >> page_shift);
967
968 err = vma->free(vma, start_page_nr, num_pages, 1);
969 if (err) {
970 gk20a_err(dev_from_vm(vm),
971 "not found: offset=0x%llx, sz=0x%llx",
972 offset, size);
973 }
974
975 return err;
976} 892}
977 893
978static int insert_mapped_buffer(struct rb_root *root, 894static int insert_mapped_buffer(struct rb_root *root,
@@ -1169,7 +1085,7 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
1169 1085
1170 if (map_offset & (vm->gmmu_page_sizes[bfr->pgsz_idx] - 1)) { 1086 if (map_offset & (vm->gmmu_page_sizes[bfr->pgsz_idx] - 1)) {
1171 gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx", 1087 gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx",
1172 map_offset); 1088 map_offset);
1173 return -EINVAL; 1089 return -EINVAL;
1174 } 1090 }
1175 1091
@@ -2613,7 +2529,6 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2613 char *name) 2529 char *name)
2614{ 2530{
2615 int err, i; 2531 int err, i;
2616 u32 num_small_pages, num_large_pages, low_hole_pages;
2617 char alloc_name[32]; 2532 char alloc_name[32];
2618 u64 small_vma_size, large_vma_size; 2533 u64 small_vma_size, large_vma_size;
2619 u32 pde_lo, pde_hi; 2534 u32 pde_lo, pde_hi;
@@ -2674,34 +2589,31 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2674 large_vma_size = vm->va_limit - small_vma_size; 2589 large_vma_size = vm->va_limit - small_vma_size;
2675 } 2590 }
2676 2591
2677 num_small_pages = (u32)(small_vma_size >>
2678 ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
2679
2680 /* num_pages above is without regard to the low-side hole. */
2681 low_hole_pages = (vm->va_start >>
2682 ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
2683
2684 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, 2592 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name,
2685 vm->gmmu_page_sizes[gmmu_page_size_small]>>10); 2593 vm->gmmu_page_sizes[gmmu_page_size_small]>>10);
2686 err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small], 2594 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
2687 alloc_name, 2595 vm, alloc_name,
2688 low_hole_pages, /*start*/ 2596 vm->va_start,
2689 num_small_pages - low_hole_pages);/* length*/ 2597 small_vma_size - vm->va_start,
2598 SZ_4K,
2599 GPU_BALLOC_MAX_ORDER,
2600 GPU_BALLOC_GVA_SPACE);
2690 if (err) 2601 if (err)
2691 goto clean_up_ptes; 2602 goto clean_up_ptes;
2692 2603
2693 if (big_pages) { 2604 if (big_pages) {
2694 u32 start = (u32)(small_vma_size >>
2695 ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
2696 num_large_pages = (u32)(large_vma_size >>
2697 ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
2698
2699 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", 2605 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB",
2700 name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10); 2606 name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10);
2701 err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big], 2607 /*
2702 alloc_name, 2608 * Big page VMA starts at the end of the small page VMA.
2703 start, /* start */ 2609 */
2704 num_large_pages); /* length */ 2610 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
2611 vm, alloc_name,
2612 small_vma_size,
2613 large_vma_size,
2614 big_page_size,
2615 GPU_BALLOC_MAX_ORDER,
2616 GPU_BALLOC_GVA_SPACE);
2705 if (err) 2617 if (err)
2706 goto clean_up_small_allocator; 2618 goto clean_up_small_allocator;
2707 } 2619 }
@@ -2782,9 +2694,9 @@ int gk20a_vm_release_share(struct gk20a_as_share *as_share)
2782int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, 2694int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2783 struct nvgpu_as_alloc_space_args *args) 2695 struct nvgpu_as_alloc_space_args *args)
2784 2696
2785{ int err = -ENOMEM; 2697{
2698 int err = -ENOMEM;
2786 int pgsz_idx = gmmu_page_size_small; 2699 int pgsz_idx = gmmu_page_size_small;
2787 u32 start_page_nr;
2788 struct gk20a_allocator *vma; 2700 struct gk20a_allocator *vma;
2789 struct vm_gk20a *vm = as_share->vm; 2701 struct vm_gk20a *vm = as_share->vm;
2790 struct gk20a *g = vm->mm->g; 2702 struct gk20a *g = vm->mm->g;
@@ -2815,21 +2727,19 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2815 goto clean_up; 2727 goto clean_up;
2816 } 2728 }
2817 2729
2818 start_page_nr = 0; 2730 vma = &vm->vma[pgsz_idx];
2819 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) 2731 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
2820 start_page_nr = (u32)(args->o_a.offset >> 2732 vaddr_start = gk20a_balloc_fixed(vma, args->o_a.offset,
2821 ilog2(vm->gmmu_page_sizes[pgsz_idx])); 2733 (u64)args->pages *
2734 (u64)args->page_size);
2735 else
2736 vaddr_start = gk20a_balloc(vma, args->pages * args->page_size);
2822 2737
2823 vma = &vm->vma[pgsz_idx]; 2738 if (!vaddr_start) {
2824 err = vma->alloc(vma, &start_page_nr, args->pages, 1);
2825 if (err) {
2826 kfree(va_node); 2739 kfree(va_node);
2827 goto clean_up; 2740 goto clean_up;
2828 } 2741 }
2829 2742
2830 vaddr_start = (u64)start_page_nr <<
2831 ilog2(vm->gmmu_page_sizes[pgsz_idx]);
2832
2833 va_node->vaddr_start = vaddr_start; 2743 va_node->vaddr_start = vaddr_start;
2834 va_node->size = (u64)args->page_size * (u64)args->pages; 2744 va_node->size = (u64)args->page_size * (u64)args->pages;
2835 va_node->pgsz_idx = pgsz_idx; 2745 va_node->pgsz_idx = pgsz_idx;
@@ -2853,7 +2763,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2853 true); 2763 true);
2854 if (!map_offset) { 2764 if (!map_offset) {
2855 mutex_unlock(&vm->update_gmmu_lock); 2765 mutex_unlock(&vm->update_gmmu_lock);
2856 vma->free(vma, start_page_nr, args->pages, 1); 2766 gk20a_bfree(vma, vaddr_start);
2857 kfree(va_node); 2767 kfree(va_node);
2858 goto clean_up; 2768 goto clean_up;
2859 } 2769 }
@@ -2865,6 +2775,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2865 mutex_unlock(&vm->update_gmmu_lock); 2775 mutex_unlock(&vm->update_gmmu_lock);
2866 2776
2867 args->o_a.offset = vaddr_start; 2777 args->o_a.offset = vaddr_start;
2778 err = 0;
2868 2779
2869clean_up: 2780clean_up:
2870 return err; 2781 return err;
@@ -2875,7 +2786,6 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2875{ 2786{
2876 int err = -ENOMEM; 2787 int err = -ENOMEM;
2877 int pgsz_idx; 2788 int pgsz_idx;
2878 u32 start_page_nr;
2879 struct gk20a_allocator *vma; 2789 struct gk20a_allocator *vma;
2880 struct vm_gk20a *vm = as_share->vm; 2790 struct vm_gk20a *vm = as_share->vm;
2881 struct vm_reserved_va_node *va_node; 2791 struct vm_reserved_va_node *va_node;
@@ -2888,14 +2798,8 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2888 pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ? 2798 pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ?
2889 gmmu_page_size_big : gmmu_page_size_small; 2799 gmmu_page_size_big : gmmu_page_size_small;
2890 2800
2891 start_page_nr = (u32)(args->offset >>
2892 ilog2(vm->gmmu_page_sizes[pgsz_idx]));
2893
2894 vma = &vm->vma[pgsz_idx]; 2801 vma = &vm->vma[pgsz_idx];
2895 err = vma->free(vma, start_page_nr, args->pages, 1); 2802 gk20a_bfree(vma, args->offset);
2896
2897 if (err)
2898 goto clean_up;
2899 2803
2900 mutex_lock(&vm->update_gmmu_lock); 2804 mutex_lock(&vm->update_gmmu_lock);
2901 va_node = addr_to_reservation(vm, args->offset); 2805 va_node = addr_to_reservation(vm, args->offset);
@@ -2925,8 +2829,8 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2925 kfree(va_node); 2829 kfree(va_node);
2926 } 2830 }
2927 mutex_unlock(&vm->update_gmmu_lock); 2831 mutex_unlock(&vm->update_gmmu_lock);
2832 err = 0;
2928 2833
2929clean_up:
2930 return err; 2834 return err;
2931} 2835}
2932 2836
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index c1f8a4f0..82003cd0 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -131,6 +131,7 @@ enum gmmu_pgsz_gk20a {
131}; 131};
132 132
133struct gk20a_comptags { 133struct gk20a_comptags {
134 u32 real_offset;
134 u32 offset; 135 u32 offset;
135 u32 lines; 136 u32 lines;
136 u32 allocated_lines; 137 u32 allocated_lines;
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index 2456c784..11322293 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -2816,7 +2816,6 @@ static int pmu_init_perfmon(struct pmu_gk20a *pmu)
2816 struct pmu_payload payload; 2816 struct pmu_payload payload;
2817 u32 seq; 2817 u32 seq;
2818 u32 data; 2818 u32 data;
2819 int err = 0;
2820 2819
2821 gk20a_dbg_fn(""); 2820 gk20a_dbg_fn("");
2822 2821
@@ -2867,12 +2866,11 @@ static int pmu_init_perfmon(struct pmu_gk20a *pmu)
2867 gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data); 2866 gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data);
2868 2867
2869 if (!pmu->sample_buffer) 2868 if (!pmu->sample_buffer)
2870 err = pmu->dmem.alloc(&pmu->dmem, 2869 pmu->sample_buffer = gk20a_balloc(&pmu->dmem,
2871 &pmu->sample_buffer, 2 * sizeof(u16), 2870 2 * sizeof(u16));
2872 PMU_DMEM_ALLOC_ALIGNMENT); 2871 if (!pmu->sample_buffer) {
2873 if (err) {
2874 gk20a_err(dev_from_gk20a(g), 2872 gk20a_err(dev_from_gk20a(g),
2875 "failed to allocate perfmon sample buffer"); 2873 "failed to allocate perfmon sample buffer");
2876 return -ENOMEM; 2874 return -ENOMEM;
2877 } 2875 }
2878 2876
@@ -2970,15 +2968,17 @@ static int pmu_process_init_msg(struct pmu_gk20a *pmu,
2970 for (i = 0; i < PMU_QUEUE_COUNT; i++) 2968 for (i = 0; i < PMU_QUEUE_COUNT; i++)
2971 pmu_queue_init(pmu, i, init); 2969 pmu_queue_init(pmu, i, init);
2972 2970
2973 if (!pmu->dmem.alloc) { 2971 if (!pmu->dmem.init) {
2974 /*Align start and end addresses*/ 2972 /* Align start and end addresses */
2975 u32 start = ALIGN(pv->get_pmu_init_msg_pmu_sw_mg_off(init), 2973 u32 start = ALIGN(pv->get_pmu_init_msg_pmu_sw_mg_off(init),
2976 PMU_DMEM_ALLOC_ALIGNMENT); 2974 PMU_DMEM_ALLOC_ALIGNMENT);
2977 u32 end = (pv->get_pmu_init_msg_pmu_sw_mg_off(init) + 2975 u32 end = (pv->get_pmu_init_msg_pmu_sw_mg_off(init) +
2978 pv->get_pmu_init_msg_pmu_sw_mg_size(init)) & 2976 pv->get_pmu_init_msg_pmu_sw_mg_size(init)) &
2979 ~(PMU_DMEM_ALLOC_ALIGNMENT - 1); 2977 ~(PMU_DMEM_ALLOC_ALIGNMENT - 1);
2980 u32 size = end - start; 2978 u32 size = end - start;
2981 gk20a_allocator_init(&pmu->dmem, "gk20a_pmu_dmem", start, size); 2979 __gk20a_allocator_init(&pmu->dmem, NULL, "gk20a_pmu_dmem",
2980 start, size,
2981 PMU_DMEM_ALLOC_ALIGNMENT, 4, 0);
2982 } 2982 }
2983 2983
2984 pmu->pmu_ready = true; 2984 pmu->pmu_ready = true;
@@ -3115,20 +3115,14 @@ static int pmu_response_handle(struct pmu_gk20a *pmu,
3115 seq->callback = NULL; 3115 seq->callback = NULL;
3116 if (pv->pmu_allocation_get_dmem_size(pmu, 3116 if (pv->pmu_allocation_get_dmem_size(pmu,
3117 pv->get_pmu_seq_in_a_ptr(seq)) != 0) 3117 pv->get_pmu_seq_in_a_ptr(seq)) != 0)
3118 pmu->dmem.free(&pmu->dmem, 3118 gk20a_bfree(&pmu->dmem,
3119 pv->pmu_allocation_get_dmem_offset(pmu, 3119 pv->pmu_allocation_get_dmem_offset(pmu,
3120 pv->get_pmu_seq_in_a_ptr(seq)), 3120 pv->get_pmu_seq_in_a_ptr(seq)));
3121 pv->pmu_allocation_get_dmem_size(pmu,
3122 pv->get_pmu_seq_in_a_ptr(seq)),
3123 PMU_DMEM_ALLOC_ALIGNMENT);
3124 if (pv->pmu_allocation_get_dmem_size(pmu, 3121 if (pv->pmu_allocation_get_dmem_size(pmu,
3125 pv->get_pmu_seq_out_a_ptr(seq)) != 0) 3122 pv->get_pmu_seq_out_a_ptr(seq)) != 0)
3126 pmu->dmem.free(&pmu->dmem, 3123 gk20a_bfree(&pmu->dmem,
3127 pv->pmu_allocation_get_dmem_offset(pmu, 3124 pv->pmu_allocation_get_dmem_offset(pmu,
3128 pv->get_pmu_seq_out_a_ptr(seq)), 3125 pv->get_pmu_seq_out_a_ptr(seq)));
3129 pv->pmu_allocation_get_dmem_size(pmu,
3130 pv->get_pmu_seq_out_a_ptr(seq)),
3131 PMU_DMEM_ALLOC_ALIGNMENT);
3132 3126
3133 if (seq->callback) 3127 if (seq->callback)
3134 seq->callback(g, msg, seq->cb_params, seq->desc, ret); 3128 seq->callback(g, msg, seq->cb_params, seq->desc, ret);
@@ -3769,11 +3763,10 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
3769 pv->pmu_allocation_set_dmem_size(pmu, in, 3763 pv->pmu_allocation_set_dmem_size(pmu, in,
3770 (u16)max(payload->in.size, payload->out.size)); 3764 (u16)max(payload->in.size, payload->out.size));
3771 3765
3772 err = pmu->dmem.alloc(&pmu->dmem, 3766 *(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)) =
3773 pv->pmu_allocation_get_dmem_offset_addr(pmu, in), 3767 gk20a_balloc(&pmu->dmem,
3774 pv->pmu_allocation_get_dmem_size(pmu, in), 3768 pv->pmu_allocation_get_dmem_size(pmu, in));
3775 PMU_DMEM_ALLOC_ALIGNMENT); 3769 if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)))
3776 if (err)
3777 goto clean_up; 3770 goto clean_up;
3778 3771
3779 pmu_copy_to_dmem(pmu, (pv->pmu_allocation_get_dmem_offset(pmu, 3772 pmu_copy_to_dmem(pmu, (pv->pmu_allocation_get_dmem_offset(pmu,
@@ -3794,11 +3787,12 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
3794 (u16)payload->out.size); 3787 (u16)payload->out.size);
3795 3788
3796 if (payload->out.buf != payload->in.buf) { 3789 if (payload->out.buf != payload->in.buf) {
3797 err = pmu->dmem.alloc(&pmu->dmem, 3790
3798 pv->pmu_allocation_get_dmem_offset_addr(pmu, out), 3791 *(pv->pmu_allocation_get_dmem_offset_addr(pmu, out)) =
3799 pv->pmu_allocation_get_dmem_size(pmu, out), 3792 gk20a_balloc(&pmu->dmem,
3800 PMU_DMEM_ALLOC_ALIGNMENT); 3793 pv->pmu_allocation_get_dmem_size(pmu, out));
3801 if (err) 3794 if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu,
3795 out)))
3802 goto clean_up; 3796 goto clean_up;
3803 } else { 3797 } else {
3804 BUG_ON(in == NULL); 3798 BUG_ON(in == NULL);
@@ -3826,15 +3820,11 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
3826clean_up: 3820clean_up:
3827 gk20a_dbg_fn("fail"); 3821 gk20a_dbg_fn("fail");
3828 if (in) 3822 if (in)
3829 pmu->dmem.free(&pmu->dmem, 3823 gk20a_bfree(&pmu->dmem,
3830 pv->pmu_allocation_get_dmem_offset(pmu, in), 3824 pv->pmu_allocation_get_dmem_offset(pmu, in));
3831 pv->pmu_allocation_get_dmem_size(pmu, in),
3832 PMU_DMEM_ALLOC_ALIGNMENT);
3833 if (out) 3825 if (out)
3834 pmu->dmem.free(&pmu->dmem, 3826 gk20a_bfree(&pmu->dmem,
3835 pv->pmu_allocation_get_dmem_offset(pmu, out), 3827 pv->pmu_allocation_get_dmem_offset(pmu, out));
3836 pv->pmu_allocation_get_dmem_size(pmu, out),
3837 PMU_DMEM_ALLOC_ALIGNMENT);
3838 3828
3839 pmu_seq_release(pmu, seq); 3829 pmu_seq_release(pmu, seq);
3840 return err; 3830 return err;
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
index 73530b22..f29c810e 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
@@ -3,7 +3,7 @@
3 * 3 *
4 * GK20A PMU (aka. gPMU outside gk20a context) 4 * GK20A PMU (aka. gPMU outside gk20a context)
5 * 5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. 6 * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
7 * 7 *
8 * This program is free software; you can redistribute it and/or modify it 8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License, 9 * under the terms and conditions of the GNU General Public License,
@@ -466,7 +466,7 @@ struct pmu_ucode_desc {
466#define PMU_UNIT_ID_IS_VALID(id) \ 466#define PMU_UNIT_ID_IS_VALID(id) \
467 (((id) < PMU_UNIT_END) || ((id) >= PMU_UNIT_TEST_START)) 467 (((id) < PMU_UNIT_END) || ((id) >= PMU_UNIT_TEST_START))
468 468
469#define PMU_DMEM_ALLOC_ALIGNMENT (32) 469#define PMU_DMEM_ALLOC_ALIGNMENT (4)
470#define PMU_DMEM_ALIGNMENT (4) 470#define PMU_DMEM_ALIGNMENT (4)
471 471
472#define PMU_CMD_FLAGS_PMU_MASK (0xF0) 472#define PMU_CMD_FLAGS_PMU_MASK (0xF0)
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
index 04f61c58..053550f6 100644
--- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * GK20A Semaphores 4 * GK20A Semaphores
5 * 5 *
6 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. 6 * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
7 * 7 *
8 * This program is free software; you can redistribute it and/or modify it 8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License, 9 * under the terms and conditions of the GNU General Public License,
@@ -44,8 +44,10 @@ struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(struct device *d,
44 if (gk20a_get_sgtable(d, &p->sgt, p->cpu_va, p->iova, p->size)) 44 if (gk20a_get_sgtable(d, &p->sgt, p->cpu_va, p->iova, p->size))
45 goto clean_up; 45 goto clean_up;
46 46
47 if (gk20a_allocator_init(&p->alloc, unique_name, 0, 47 /* Sacrifice one semaphore in the name of returning error codes. */
48 p->size)) 48 if (gk20a_allocator_init(&p->alloc, unique_name,
49 SEMAPHORE_SIZE, p->size - SEMAPHORE_SIZE,
50 SEMAPHORE_SIZE))
49 goto clean_up; 51 goto clean_up;
50 52
51 gk20a_dbg_info("cpuva=%p iova=%llx phys=%llx", p->cpu_va, 53 gk20a_dbg_info("cpuva=%p iova=%llx phys=%llx", p->cpu_va,
@@ -163,8 +165,8 @@ struct gk20a_semaphore *gk20a_semaphore_alloc(struct gk20a_semaphore_pool *pool)
163 if (!s) 165 if (!s)
164 return NULL; 166 return NULL;
165 167
166 if (pool->alloc.alloc(&pool->alloc, &s->offset, SEMAPHORE_SIZE, 168 s->offset = gk20a_balloc(&pool->alloc, SEMAPHORE_SIZE);
167 SEMAPHORE_SIZE)) { 169 if (!s->offset) {
168 gk20a_err(pool->dev, "failed to allocate semaphore"); 170 gk20a_err(pool->dev, "failed to allocate semaphore");
169 kfree(s); 171 kfree(s);
170 return NULL; 172 return NULL;
@@ -186,8 +188,7 @@ static void gk20a_semaphore_free(struct kref *ref)
186 struct gk20a_semaphore *s = 188 struct gk20a_semaphore *s =
187 container_of(ref, struct gk20a_semaphore, ref); 189 container_of(ref, struct gk20a_semaphore, ref);
188 190
189 s->pool->alloc.free(&s->pool->alloc, s->offset, SEMAPHORE_SIZE, 191 gk20a_bfree(&s->pool->alloc, s->offset);
190 SEMAPHORE_SIZE);
191 gk20a_semaphore_pool_put(s->pool); 192 gk20a_semaphore_pool_put(s->pool);
192 kfree(s); 193 kfree(s);
193} 194}
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
index 9d16dba7..bc904ef3 100644
--- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
@@ -90,9 +90,8 @@ static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
90 if (err) 90 if (err)
91 return err; 91 return err;
92 92
93 gk20a_allocator_init(&gr->comp_tags, "comptag", 93 __gk20a_allocator_init(&gr->comp_tags, NULL, "comptag",
94 1, /* start */ 94 1, max_comptag_lines - 1, 1, 10, 0);
95 max_comptag_lines - 1); /* length*/
96 95
97 gr->comptags_per_cacheline = comptags_per_cacheline; 96 gr->comptags_per_cacheline = comptags_per_cacheline;
98 gr->slices_per_ltc = slices_per_ltc; 97 gr->slices_per_ltc = slices_per_ltc;
diff --git a/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c
index 1beac216..211e34b5 100644
--- a/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c
@@ -41,9 +41,8 @@ static int vgpu_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
41 if (max_comptag_lines < 2) 41 if (max_comptag_lines < 2)
42 return -ENXIO; 42 return -ENXIO;
43 43
44 gk20a_allocator_init(&gr->comp_tags, "comptag", 44 __gk20a_allocator_init(&gr->comp_tags, NULL, "comptag",
45 1, /* start */ 45 1, max_comptag_lines - 1, 1, 10, 0); /* length*/
46 max_comptag_lines - 1); /* length*/
47 return 0; 46 return 0;
48} 47}
49 48
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index 94e4602f..855aac0d 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -243,11 +243,9 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
243 struct tegra_vgpu_as_share_params *p = &msg.params.as_share; 243 struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
244 struct mm_gk20a *mm = &g->mm; 244 struct mm_gk20a *mm = &g->mm;
245 struct vm_gk20a *vm; 245 struct vm_gk20a *vm;
246 u32 num_small_pages, num_large_pages, low_hole_pages;
247 u64 small_vma_size, large_vma_size; 246 u64 small_vma_size, large_vma_size;
248 char name[32]; 247 char name[32];
249 int err, i; 248 int err, i;
250 u32 start;
251 249
252 /* note: keep the page sizes sorted lowest to highest here */ 250 /* note: keep the page sizes sorted lowest to highest here */
253 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { 251 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = {
@@ -294,33 +292,27 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
294 small_vma_size = (u64)16 << 30; 292 small_vma_size = (u64)16 << 30;
295 large_vma_size = vm->va_limit - small_vma_size; 293 large_vma_size = vm->va_limit - small_vma_size;
296 294
297 num_small_pages = (u32)(small_vma_size >>
298 ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
299
300 /* num_pages above is without regard to the low-side hole. */
301 low_hole_pages = (vm->va_start >>
302 ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
303
304 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, 295 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
305 gmmu_page_sizes[gmmu_page_size_small]>>10); 296 gmmu_page_sizes[gmmu_page_size_small]>>10);
306 err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small], 297 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
307 name, 298 vm, name,
308 low_hole_pages, /*start*/ 299 vm->va_start,
309 num_small_pages - low_hole_pages);/* length*/ 300 small_vma_size - vm->va_start,
301 SZ_4K,
302 GPU_BALLOC_MAX_ORDER,
303 GPU_BALLOC_GVA_SPACE);
310 if (err) 304 if (err)
311 goto clean_up_share; 305 goto clean_up_share;
312 306
313 start = (u32)(small_vma_size >>
314 ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
315 num_large_pages = (u32)(large_vma_size >>
316 ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
317
318 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, 307 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
319 gmmu_page_sizes[gmmu_page_size_big]>>10); 308 gmmu_page_sizes[gmmu_page_size_big]>>10);
320 err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big], 309 err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
321 name, 310 vm, name,
322 start, /* start */ 311 small_vma_size,
323 num_large_pages); /* length */ 312 large_vma_size,
313 big_page_size,
314 GPU_BALLOC_MAX_ORDER,
315 GPU_BALLOC_GVA_SPACE);
324 if (err) 316 if (err)
325 goto clean_up_small_allocator; 317 goto clean_up_small_allocator;
326 318