diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/as_gk20a.c | 15 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a_allocator.c | 1167 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a_allocator.h | 213 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ltc_gk20a.c | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 202 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | 68 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/pmu_gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c | 15 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/ltc_vgpu.c | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 36 |
13 files changed, 1385 insertions, 353 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c index 63569008..eb18fa65 100644 --- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c | |||
@@ -199,21 +199,14 @@ static int gk20a_as_ioctl_get_va_regions( | |||
199 | 199 | ||
200 | for (i = 0; i < write_entries; ++i) { | 200 | for (i = 0; i < write_entries; ++i) { |
201 | struct nvgpu_as_va_region region; | 201 | struct nvgpu_as_va_region region; |
202 | u32 base, limit; | ||
203 | 202 | ||
204 | memset(®ion, 0, sizeof(struct nvgpu_as_va_region)); | 203 | memset(®ion, 0, sizeof(struct nvgpu_as_va_region)); |
205 | 204 | ||
206 | if (!vm->vma[i].constraint.enable) { | ||
207 | base = vm->vma[i].base; | ||
208 | limit = vm->vma[i].limit; | ||
209 | } else { | ||
210 | base = vm->vma[i].constraint.base; | ||
211 | limit = vm->vma[i].constraint.limit; | ||
212 | } | ||
213 | |||
214 | region.page_size = vm->gmmu_page_sizes[i]; | 205 | region.page_size = vm->gmmu_page_sizes[i]; |
215 | region.offset = (u64)base * region.page_size; | 206 | region.offset = vm->vma[i].base; |
216 | region.pages = limit - base; /* NOTE: limit is exclusive */ | 207 | /* No __aeabi_uldivmod() on some platforms... */ |
208 | region.pages = (vm->vma[i].end - vm->vma[i].start) >> | ||
209 | ilog2(region.page_size); | ||
217 | 210 | ||
218 | if (copy_to_user(user_region_ptr + i, ®ion, sizeof(region))) | 211 | if (copy_to_user(user_region_ptr + i, ®ion, sizeof(region))) |
219 | return -EFAULT; | 212 | return -EFAULT; |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index f3b5544f..2e88726a 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -59,6 +59,7 @@ | |||
59 | #include "hw_fb_gk20a.h" | 59 | #include "hw_fb_gk20a.h" |
60 | #include "gk20a_scale.h" | 60 | #include "gk20a_scale.h" |
61 | #include "dbg_gpu_gk20a.h" | 61 | #include "dbg_gpu_gk20a.h" |
62 | #include "gk20a_allocator.h" | ||
62 | #include "hal.h" | 63 | #include "hal.h" |
63 | #include "vgpu/vgpu.h" | 64 | #include "vgpu/vgpu.h" |
64 | 65 | ||
@@ -1532,6 +1533,7 @@ static int gk20a_probe(struct platform_device *dev) | |||
1532 | gr_gk20a_debugfs_init(gk20a); | 1533 | gr_gk20a_debugfs_init(gk20a); |
1533 | gk20a_pmu_debugfs_init(dev); | 1534 | gk20a_pmu_debugfs_init(dev); |
1534 | gk20a_cde_debugfs_init(dev); | 1535 | gk20a_cde_debugfs_init(dev); |
1536 | gk20a_alloc_debugfs_init(dev); | ||
1535 | #endif | 1537 | #endif |
1536 | 1538 | ||
1537 | gk20a_init_gr(gk20a); | 1539 | gk20a_init_gr(gk20a); |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c index 675a98a2..56fb22df 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * gk20a allocator | 2 | * gk20a allocator |
3 | * | 3 | * |
4 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | 4 | * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -16,112 +16,1149 @@ | |||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | 16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/seq_file.h> | ||
21 | #include <linux/slab.h> | ||
22 | #include <linux/debugfs.h> | ||
23 | |||
24 | #include "platform_gk20a.h" | ||
19 | #include "gk20a_allocator.h" | 25 | #include "gk20a_allocator.h" |
20 | #include <linux/vmalloc.h> | ||
21 | 26 | ||
22 | /* init allocator struct */ | 27 | #include "mm_gk20a.h" |
23 | int gk20a_allocator_init(struct gk20a_allocator *allocator, | 28 | |
24 | const char *name, u32 start, u32 len) | 29 | static struct dentry *balloc_debugfs_root; |
30 | |||
31 | static struct kmem_cache *buddy_cache; /* slab cache for meta data. */ | ||
32 | |||
33 | static u32 balloc_tracing_on; | ||
34 | |||
35 | #define balloc_trace_func() \ | ||
36 | do { \ | ||
37 | if (balloc_tracing_on) \ | ||
38 | trace_printk("%s\n", __func__); \ | ||
39 | } while (0) | ||
40 | |||
41 | #define balloc_trace_func_done() \ | ||
42 | do { \ | ||
43 | if (balloc_tracing_on) \ | ||
44 | trace_printk("%s_done\n", __func__); \ | ||
45 | } while (0) | ||
46 | |||
47 | |||
48 | static void balloc_init_alloc_debug(struct gk20a_allocator *a); | ||
49 | static void balloc_print_stats(struct gk20a_allocator *a, struct seq_file *s, | ||
50 | int lock); | ||
51 | static struct gk20a_buddy *balloc_free_buddy(struct gk20a_allocator *a, | ||
52 | u64 addr); | ||
53 | static void balloc_coalesce(struct gk20a_allocator *a, struct gk20a_buddy *b); | ||
54 | static void __balloc_do_free_fixed(struct gk20a_allocator *a, | ||
55 | struct gk20a_fixed_alloc *falloc); | ||
56 | |||
57 | /* | ||
58 | * This function is not present in older kernel's list.h code. | ||
59 | */ | ||
60 | #ifndef list_last_entry | ||
61 | #define list_last_entry(ptr, type, member) \ | ||
62 | list_entry((ptr)->prev, type, member) | ||
63 | #endif | ||
64 | |||
65 | /* | ||
66 | * GPU buddy allocator for various address spaces. | ||
67 | * | ||
68 | * Current limitations: | ||
69 | * o A fixed allocation could potentially be made that borders PDEs with | ||
70 | * different PTE sizes. This would require that fixed buffer to have | ||
71 | * different sized PTEs for different parts of the allocation. Probably | ||
72 | * best to just require PDE alignment for fixed address allocs. | ||
73 | * | ||
74 | * o It is currently possible to make an allocator that has a buddy alignment | ||
75 | * out of sync with the PDE block size alignment. A simple example is a | ||
76 | * 32GB address space starting at byte 1. Every buddy is shifted off by 1 | ||
77 | * which means each buddy corresponf to more than one actual GPU page. The | ||
78 | * best way to fix this is probably just require PDE blocksize alignment | ||
79 | * for the start of the address space. At the moment all allocators are | ||
80 | * easily PDE aligned so this hasn't been a problem. | ||
81 | */ | ||
82 | |||
83 | /* | ||
84 | * Pick a suitable maximum order for this allocator. | ||
85 | * | ||
86 | * Hueristic: Just guessing that the best max order is the largest single | ||
87 | * block that will fit in the address space. | ||
88 | */ | ||
89 | static void balloc_compute_max_order(struct gk20a_allocator *a) | ||
90 | { | ||
91 | u64 true_max_order = ilog2(a->blks); | ||
92 | |||
93 | if (a->max_order > true_max_order) | ||
94 | a->max_order = true_max_order; | ||
95 | if (a->max_order > GPU_BALLOC_MAX_ORDER) | ||
96 | a->max_order = GPU_BALLOC_MAX_ORDER; | ||
97 | } | ||
98 | |||
99 | /* | ||
100 | * Since we can only allocate in chucks of a->blk_size we need to trim off | ||
101 | * any excess data that is not aligned to a->blk_size. | ||
102 | */ | ||
103 | static void balloc_allocator_align(struct gk20a_allocator *a) | ||
104 | { | ||
105 | a->start = ALIGN(a->base, a->blk_size); | ||
106 | a->end = (a->base + a->length) & ~(a->blk_size - 1); | ||
107 | a->count = a->end - a->start; | ||
108 | a->blks = a->count >> a->blk_shift; | ||
109 | } | ||
110 | |||
111 | /* | ||
112 | * Pass NULL for parent if you want a top level buddy. | ||
113 | */ | ||
114 | static struct gk20a_buddy *balloc_new_buddy(struct gk20a_allocator *a, | ||
115 | struct gk20a_buddy *parent, | ||
116 | u64 start, u64 order) | ||
117 | { | ||
118 | struct gk20a_buddy *new_buddy; | ||
119 | |||
120 | new_buddy = kmem_cache_alloc(buddy_cache, GFP_KERNEL); | ||
121 | if (!new_buddy) | ||
122 | return NULL; | ||
123 | |||
124 | memset(new_buddy, 0, sizeof(struct gk20a_buddy)); | ||
125 | |||
126 | new_buddy->parent = parent; | ||
127 | new_buddy->start = start; | ||
128 | new_buddy->order = order; | ||
129 | new_buddy->end = start + (1 << order) * a->blk_size; | ||
130 | |||
131 | return new_buddy; | ||
132 | } | ||
133 | |||
134 | static void __balloc_buddy_list_add(struct gk20a_allocator *a, | ||
135 | struct gk20a_buddy *b, | ||
136 | struct list_head *list) | ||
137 | { | ||
138 | if (buddy_is_in_list(b)) { | ||
139 | balloc_dbg(a, "Oops: adding added buddy (%llu:0x%llx)\n", | ||
140 | b->order, b->start); | ||
141 | BUG(); | ||
142 | } | ||
143 | |||
144 | /* | ||
145 | * Add big PTE blocks to the tail, small to the head for GVA spaces. | ||
146 | * This lets the code that checks if there are available blocks check | ||
147 | * without cycling through the entire list. | ||
148 | */ | ||
149 | if (a->flags & GPU_BALLOC_GVA_SPACE && | ||
150 | b->pte_size == BALLOC_PTE_SIZE_BIG) | ||
151 | list_add_tail(&b->buddy_entry, list); | ||
152 | else | ||
153 | list_add(&b->buddy_entry, list); | ||
154 | |||
155 | buddy_set_in_list(b); | ||
156 | } | ||
157 | |||
158 | static void __balloc_buddy_list_rem(struct gk20a_allocator *a, | ||
159 | struct gk20a_buddy *b) | ||
160 | { | ||
161 | if (!buddy_is_in_list(b)) { | ||
162 | balloc_dbg(a, "Oops: removing removed buddy (%llu:0x%llx)\n", | ||
163 | b->order, b->start); | ||
164 | BUG(); | ||
165 | } | ||
166 | |||
167 | list_del_init(&b->buddy_entry); | ||
168 | buddy_clr_in_list(b); | ||
169 | } | ||
170 | |||
171 | /* | ||
172 | * Add a buddy to one of the buddy lists and deal with the necessary | ||
173 | * book keeping. Adds the buddy to the list specified by the buddy's order. | ||
174 | */ | ||
175 | static void balloc_blist_add(struct gk20a_allocator *a, struct gk20a_buddy *b) | ||
176 | { | ||
177 | __balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order)); | ||
178 | a->buddy_list_len[b->order]++; | ||
179 | } | ||
180 | |||
181 | static void balloc_blist_rem(struct gk20a_allocator *a, struct gk20a_buddy *b) | ||
182 | { | ||
183 | __balloc_buddy_list_rem(a, b); | ||
184 | a->buddy_list_len[b->order]--; | ||
185 | } | ||
186 | |||
187 | static u64 balloc_get_order(struct gk20a_allocator *a, u64 len) | ||
188 | { | ||
189 | if (len == 0) | ||
190 | return 0; | ||
191 | |||
192 | len--; | ||
193 | len >>= a->blk_shift; | ||
194 | |||
195 | return fls(len); | ||
196 | } | ||
197 | |||
198 | static u64 __balloc_max_order_in(struct gk20a_allocator *a, u64 start, u64 end) | ||
199 | { | ||
200 | u64 size = (end - start) >> a->blk_shift; | ||
201 | |||
202 | if (size > 0) | ||
203 | return min_t(u64, ilog2(size), a->max_order); | ||
204 | else | ||
205 | return GPU_BALLOC_MAX_ORDER; | ||
206 | } | ||
207 | |||
208 | /* | ||
209 | * Initialize the buddy lists. | ||
210 | */ | ||
211 | static int balloc_init_lists(struct gk20a_allocator *a) | ||
212 | { | ||
213 | int i; | ||
214 | u64 bstart, bend, order; | ||
215 | struct gk20a_buddy *buddy; | ||
216 | |||
217 | bstart = a->start; | ||
218 | bend = a->end; | ||
219 | |||
220 | /* First make sure the LLs are valid. */ | ||
221 | for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) | ||
222 | INIT_LIST_HEAD(balloc_get_order_list(a, i)); | ||
223 | |||
224 | while (bstart < bend) { | ||
225 | order = __balloc_max_order_in(a, bstart, bend); | ||
226 | |||
227 | buddy = balloc_new_buddy(a, NULL, bstart, order); | ||
228 | if (!buddy) | ||
229 | goto cleanup; | ||
230 | |||
231 | balloc_blist_add(a, buddy); | ||
232 | bstart += balloc_order_to_len(a, order); | ||
233 | } | ||
234 | |||
235 | return 0; | ||
236 | |||
237 | cleanup: | ||
238 | for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) { | ||
239 | if (!list_empty(balloc_get_order_list(a, i))) { | ||
240 | buddy = list_first_entry(balloc_get_order_list(a, i), | ||
241 | struct gk20a_buddy, buddy_entry); | ||
242 | balloc_blist_rem(a, buddy); | ||
243 | kmem_cache_free(buddy_cache, buddy); | ||
244 | } | ||
245 | } | ||
246 | |||
247 | return -ENOMEM; | ||
248 | } | ||
249 | |||
250 | /* | ||
251 | * Initialize a buddy allocator. Returns 0 on success. This allocator does | ||
252 | * not necessarily manage bytes. It manages distinct ranges of resources. This | ||
253 | * allows the allocator to work for things like comp_tags, semaphores, etc. | ||
254 | * | ||
255 | * @allocator: Ptr to an allocator struct to init. | ||
256 | * @vm: GPU VM to associate this allocator with. Can be NULL. Will be used to | ||
257 | * get PTE size for GVA spaces. | ||
258 | * @name: Name of the allocator. Doesn't have to be static storage. | ||
259 | * @base: The base address of the resource pool being managed. | ||
260 | * @size: Number of resources in the pool. | ||
261 | * @blk_size: Minimum number of resources to allocate at once. For things like | ||
262 | * semaphores this is 1. For GVA this might be as much as 64k. This | ||
263 | * corresponds to order 0. Must be power of 2. | ||
264 | * @max_order: Pick a maximum order. If you leave this as 0, the buddy allocator | ||
265 | * will try and pick a reasonable max order. | ||
266 | * @flags: Extra flags necessary. See GPU_BALLOC_*. | ||
267 | */ | ||
268 | int __gk20a_allocator_init(struct gk20a_allocator *a, | ||
269 | struct vm_gk20a *vm, const char *name, | ||
270 | u64 base, u64 size, u64 blk_size, u64 max_order, | ||
271 | u64 flags) | ||
25 | { | 272 | { |
26 | memset(allocator, 0, sizeof(struct gk20a_allocator)); | 273 | int err; |
274 | |||
275 | memset(a, 0, sizeof(struct gk20a_allocator)); | ||
276 | strncpy(a->name, name, 32); | ||
277 | |||
278 | a->base = base; | ||
279 | a->length = size; | ||
280 | a->blk_size = blk_size; | ||
281 | a->blk_shift = __ffs(blk_size); | ||
282 | |||
283 | /* blk_size must be greater than 0 and a power of 2. */ | ||
284 | if (blk_size == 0) | ||
285 | return -EINVAL; | ||
286 | if (blk_size & (blk_size - 1)) | ||
287 | return -EINVAL; | ||
288 | |||
289 | if (max_order > GPU_BALLOC_MAX_ORDER) | ||
290 | return -EINVAL; | ||
291 | |||
292 | /* If this is to manage a GVA space we need a VM. */ | ||
293 | if (flags & GPU_BALLOC_GVA_SPACE && !vm) | ||
294 | return -EINVAL; | ||
295 | |||
296 | a->vm = vm; | ||
297 | if (flags & GPU_BALLOC_GVA_SPACE) | ||
298 | a->pte_blk_order = balloc_get_order(a, vm->big_page_size << 10); | ||
27 | 299 | ||
28 | strncpy(allocator->name, name, 32); | 300 | a->flags = flags; |
301 | a->max_order = max_order; | ||
29 | 302 | ||
30 | allocator->base = start; | 303 | balloc_allocator_align(a); |
31 | allocator->limit = start + len - 1; | 304 | balloc_compute_max_order(a); |
32 | 305 | ||
33 | allocator->bitmap = vzalloc(BITS_TO_LONGS(len) * sizeof(long)); | 306 | /* Shared buddy kmem_cache for all allocators. */ |
34 | if (!allocator->bitmap) | 307 | if (!buddy_cache) |
308 | buddy_cache = KMEM_CACHE(gk20a_buddy, 0); | ||
309 | if (!buddy_cache) | ||
35 | return -ENOMEM; | 310 | return -ENOMEM; |
36 | 311 | ||
37 | allocator_dbg(allocator, "%s : base %d, limit %d", | 312 | a->alloced_buddies = RB_ROOT; |
38 | allocator->name, allocator->base, allocator->limit); | 313 | err = balloc_init_lists(a); |
314 | if (err) | ||
315 | return err; | ||
39 | 316 | ||
40 | init_rwsem(&allocator->rw_sema); | 317 | mutex_init(&a->lock); |
41 | 318 | ||
42 | allocator->alloc = gk20a_allocator_block_alloc; | 319 | a->init = 1; |
43 | allocator->free = gk20a_allocator_block_free; | 320 | |
321 | balloc_init_alloc_debug(a); | ||
322 | balloc_dbg(a, "New allocator: base 0x%llx\n", a->base); | ||
323 | balloc_dbg(a, " size 0x%llx\n", a->length); | ||
324 | balloc_dbg(a, " blk_size 0x%llx\n", a->blk_size); | ||
325 | balloc_dbg(a, " max_order %llu\n", a->max_order); | ||
326 | balloc_dbg(a, " flags 0x%llx\n", a->flags); | ||
44 | 327 | ||
45 | return 0; | 328 | return 0; |
46 | } | 329 | } |
47 | 330 | ||
48 | /* destroy allocator, free all remaining blocks if any */ | 331 | int gk20a_allocator_init(struct gk20a_allocator *a, const char *name, |
49 | void gk20a_allocator_destroy(struct gk20a_allocator *allocator) | 332 | u64 base, u64 size, u64 blk_size) |
333 | { | ||
334 | return __gk20a_allocator_init(a, NULL, name, | ||
335 | base, size, blk_size, 0, 0); | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * Clean up and destroy the passed allocator. | ||
340 | */ | ||
341 | void gk20a_allocator_destroy(struct gk20a_allocator *a) | ||
50 | { | 342 | { |
51 | down_write(&allocator->rw_sema); | 343 | struct rb_node *node; |
344 | struct gk20a_buddy *bud; | ||
345 | struct gk20a_fixed_alloc *falloc; | ||
346 | int i; | ||
347 | |||
348 | balloc_lock(a); | ||
349 | |||
350 | if (!IS_ERR_OR_NULL(a->debugfs_entry)) | ||
351 | debugfs_remove(a->debugfs_entry); | ||
352 | |||
353 | /* | ||
354 | * Free the fixed allocs first. | ||
355 | */ | ||
356 | while ((node = rb_first(&a->fixed_allocs)) != NULL) { | ||
357 | falloc = container_of(node, | ||
358 | struct gk20a_fixed_alloc, alloced_entry); | ||
359 | |||
360 | __balloc_do_free_fixed(a, falloc); | ||
361 | rb_erase(node, &a->fixed_allocs); | ||
362 | } | ||
363 | |||
364 | /* | ||
365 | * And now free all outstanding allocations. | ||
366 | */ | ||
367 | while ((node = rb_first(&a->alloced_buddies)) != NULL) { | ||
368 | bud = container_of(node, struct gk20a_buddy, alloced_entry); | ||
369 | balloc_free_buddy(a, bud->start); | ||
370 | balloc_blist_add(a, bud); | ||
371 | balloc_coalesce(a, bud); | ||
372 | } | ||
52 | 373 | ||
53 | vfree(allocator->bitmap); | 374 | /* |
375 | * Now clean up the unallocated buddies. | ||
376 | */ | ||
377 | for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) { | ||
378 | BUG_ON(a->buddy_list_alloced[i] != 0); | ||
379 | |||
380 | while (!list_empty(balloc_get_order_list(a, i))) { | ||
381 | bud = list_first_entry(balloc_get_order_list(a, i), | ||
382 | struct gk20a_buddy, buddy_entry); | ||
383 | balloc_blist_rem(a, bud); | ||
384 | kmem_cache_free(buddy_cache, bud); | ||
385 | } | ||
386 | |||
387 | if (a->buddy_list_len[i] != 0) { | ||
388 | pr_info("Excess buddies!!! (%d: %llu)\n", | ||
389 | i, a->buddy_list_len[i]); | ||
390 | BUG(); | ||
391 | } | ||
392 | if (a->buddy_list_split[i] != 0) { | ||
393 | pr_info("Excess split nodes!!! (%d: %llu)\n", | ||
394 | i, a->buddy_list_split[i]); | ||
395 | BUG(); | ||
396 | } | ||
397 | if (a->buddy_list_alloced[i] != 0) { | ||
398 | pr_info("Excess alloced nodes!!! (%d: %llu)\n", | ||
399 | i, a->buddy_list_alloced[i]); | ||
400 | BUG(); | ||
401 | } | ||
402 | } | ||
54 | 403 | ||
55 | memset(allocator, 0, sizeof(struct gk20a_allocator)); | 404 | a->init = 0; |
405 | |||
406 | balloc_unlock(a); | ||
407 | |||
408 | /* | ||
409 | * We cant unlock an allocator after memsetting it. That wipes the | ||
410 | * state of the mutex. Hopefully no one uses the allocator after | ||
411 | * destroying it... | ||
412 | */ | ||
413 | memset(a, 0, sizeof(struct gk20a_allocator)); | ||
56 | } | 414 | } |
57 | 415 | ||
58 | /* | 416 | /* |
59 | * *addr != ~0 for fixed address allocation. if *addr == 0, base addr is | 417 | * Combine the passed buddy if possible. The pointer in @b may not be valid |
60 | * returned to caller in *addr. | 418 | * after this as the buddy may be freed. |
61 | * | 419 | * |
62 | * contiguous allocation, which allocates one block of | 420 | * @a must be locked. |
63 | * contiguous address. | 421 | */ |
64 | */ | 422 | static void balloc_coalesce(struct gk20a_allocator *a, struct gk20a_buddy *b) |
65 | int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator, | ||
66 | u32 *addr, u32 len, u32 align) | ||
67 | { | 423 | { |
68 | unsigned long _addr; | 424 | struct gk20a_buddy *parent; |
69 | 425 | ||
70 | allocator_dbg(allocator, "[in] addr %d, len %d", *addr, len); | 426 | if (buddy_is_alloced(b) || buddy_is_split(b)) |
427 | return; | ||
71 | 428 | ||
72 | if ((*addr != 0 && *addr < allocator->base) || /* check addr range */ | 429 | /* |
73 | *addr + len > allocator->limit || /* check addr range */ | 430 | * If both our buddy and I are both not allocated and not split then |
74 | *addr & (align - 1) || /* check addr alignment */ | 431 | * we can coalesce ourselves. |
75 | len == 0) /* check len */ | 432 | */ |
76 | return -EINVAL; | 433 | if (!b->buddy) |
434 | return; | ||
435 | if (buddy_is_alloced(b->buddy) || buddy_is_split(b->buddy)) | ||
436 | return; | ||
437 | |||
438 | parent = b->parent; | ||
439 | |||
440 | balloc_blist_rem(a, b); | ||
441 | balloc_blist_rem(a, b->buddy); | ||
442 | |||
443 | buddy_clr_split(parent); | ||
444 | a->buddy_list_split[parent->order]--; | ||
445 | balloc_blist_add(a, parent); | ||
446 | |||
447 | /* | ||
448 | * Recursively coalesce as far as we can go. | ||
449 | */ | ||
450 | balloc_coalesce(a, parent); | ||
451 | |||
452 | /* Clean up the remains. */ | ||
453 | kmem_cache_free(buddy_cache, b->buddy); | ||
454 | kmem_cache_free(buddy_cache, b); | ||
455 | } | ||
456 | |||
457 | /* | ||
458 | * Split a buddy into two new buddies who are 1/2 the size of the parent buddy. | ||
459 | * | ||
460 | * @a must be locked. | ||
461 | */ | ||
462 | static int balloc_split_buddy(struct gk20a_allocator *a, struct gk20a_buddy *b, | ||
463 | int pte_size) | ||
464 | { | ||
465 | struct gk20a_buddy *left, *right; | ||
466 | u64 half; | ||
77 | 467 | ||
78 | len = ALIGN(len, align); | 468 | left = balloc_new_buddy(a, b, b->start, b->order - 1); |
79 | if (!len) | 469 | if (!left) |
80 | return -ENOMEM; | 470 | return -ENOMEM; |
81 | 471 | ||
82 | down_write(&allocator->rw_sema); | 472 | half = (b->end - b->start) / 2; |
83 | 473 | ||
84 | _addr = bitmap_find_next_zero_area(allocator->bitmap, | 474 | right = balloc_new_buddy(a, b, b->start + half, b->order - 1); |
85 | allocator->limit - allocator->base + 1, | 475 | if (!right) { |
86 | *addr ? (*addr - allocator->base) : 0, | 476 | kmem_cache_free(buddy_cache, left); |
87 | len, | ||
88 | align - 1); | ||
89 | if ((_addr > allocator->limit - allocator->base + 1) || | ||
90 | (*addr && *addr != (_addr + allocator->base))) { | ||
91 | up_write(&allocator->rw_sema); | ||
92 | return -ENOMEM; | 477 | return -ENOMEM; |
93 | } | 478 | } |
94 | 479 | ||
95 | bitmap_set(allocator->bitmap, _addr, len); | 480 | buddy_set_split(b); |
96 | *addr = allocator->base + _addr; | 481 | a->buddy_list_split[b->order]++; |
97 | 482 | ||
98 | up_write(&allocator->rw_sema); | 483 | b->left = left; |
484 | b->right = right; | ||
485 | left->buddy = right; | ||
486 | right->buddy = left; | ||
487 | left->parent = b; | ||
488 | right->parent = b; | ||
99 | 489 | ||
100 | allocator_dbg(allocator, "[out] addr %d, len %d", *addr, len); | 490 | /* PTE considerations. */ |
491 | if (a->flags & GPU_BALLOC_GVA_SPACE && | ||
492 | left->order <= a->pte_blk_order) { | ||
493 | left->pte_size = pte_size; | ||
494 | right->pte_size = pte_size; | ||
495 | } | ||
496 | |||
497 | balloc_blist_rem(a, b); | ||
498 | balloc_blist_add(a, left); | ||
499 | balloc_blist_add(a, right); | ||
101 | 500 | ||
102 | return 0; | 501 | return 0; |
103 | } | 502 | } |
104 | 503 | ||
105 | /* free all blocks between start and end */ | 504 | /* |
106 | int gk20a_allocator_block_free(struct gk20a_allocator *allocator, | 505 | * Place the passed buddy into the RB tree for allocated buddies. Never fails |
107 | u32 addr, u32 len, u32 align) | 506 | * unless the passed entry is a duplicate which is a bug. |
507 | * | ||
508 | * @a must be locked. | ||
509 | */ | ||
510 | void balloc_alloc_buddy(struct gk20a_allocator *a, struct gk20a_buddy *b) | ||
108 | { | 511 | { |
109 | allocator_dbg(allocator, "[in] addr %d, len %d", addr, len); | 512 | struct rb_node **new = &(a->alloced_buddies.rb_node); |
513 | struct rb_node *parent = NULL; | ||
110 | 514 | ||
111 | if (addr + len > allocator->limit || /* check addr range */ | 515 | while (*new) { |
112 | addr < allocator->base || | 516 | struct gk20a_buddy *bud = container_of(*new, struct gk20a_buddy, |
113 | addr & (align - 1)) /* check addr alignment */ | 517 | alloced_entry); |
114 | return -EINVAL; | ||
115 | 518 | ||
116 | len = ALIGN(len, align); | 519 | parent = *new; |
117 | if (!len) | 520 | if (b->start < bud->start) |
118 | return -EINVAL; | 521 | new = &((*new)->rb_left); |
522 | else if (b->start > bud->start) | ||
523 | new = &((*new)->rb_right); | ||
524 | else | ||
525 | BUG_ON("Duplicate entries in allocated list!\n"); | ||
526 | } | ||
527 | |||
528 | rb_link_node(&b->alloced_entry, parent, new); | ||
529 | rb_insert_color(&b->alloced_entry, &a->alloced_buddies); | ||
530 | |||
531 | buddy_set_alloced(b); | ||
532 | a->buddy_list_alloced[b->order]++; | ||
533 | } | ||
534 | |||
535 | /* | ||
536 | * Remove the passed buddy from the allocated buddy RB tree. Returns the | ||
537 | * deallocated buddy for further processing. | ||
538 | * | ||
539 | * @a must be locked. | ||
540 | */ | ||
541 | static struct gk20a_buddy *balloc_free_buddy(struct gk20a_allocator *a, | ||
542 | u64 addr) | ||
543 | { | ||
544 | struct rb_node *node = a->alloced_buddies.rb_node; | ||
545 | struct gk20a_buddy *bud; | ||
546 | |||
547 | while (node) { | ||
548 | bud = container_of(node, struct gk20a_buddy, alloced_entry); | ||
549 | |||
550 | if (addr < bud->start) | ||
551 | node = node->rb_left; | ||
552 | else if (addr > bud->start) | ||
553 | node = node->rb_right; | ||
554 | else | ||
555 | break; | ||
556 | } | ||
557 | |||
558 | if (!node) | ||
559 | return NULL; | ||
560 | |||
561 | rb_erase(node, &a->alloced_buddies); | ||
562 | buddy_clr_alloced(bud); | ||
563 | a->buddy_list_alloced[bud->order]--; | ||
564 | |||
565 | return bud; | ||
566 | } | ||
567 | |||
568 | /* | ||
569 | * Find a suitable buddy for the given order and PTE type (big or little). | ||
570 | */ | ||
571 | static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_allocator *a, | ||
572 | u64 order, int pte_size) | ||
573 | { | ||
574 | struct gk20a_buddy *bud; | ||
575 | |||
576 | if (list_empty(balloc_get_order_list(a, order))) | ||
577 | return NULL; | ||
578 | |||
579 | if (a->flags & GPU_BALLOC_GVA_SPACE && | ||
580 | pte_size == BALLOC_PTE_SIZE_BIG) | ||
581 | bud = list_last_entry(balloc_get_order_list(a, order), | ||
582 | struct gk20a_buddy, buddy_entry); | ||
583 | else | ||
584 | bud = list_first_entry(balloc_get_order_list(a, order), | ||
585 | struct gk20a_buddy, buddy_entry); | ||
586 | |||
587 | if (bud->pte_size != BALLOC_PTE_SIZE_ANY && | ||
588 | bud->pte_size != pte_size) | ||
589 | return NULL; | ||
590 | |||
591 | return bud; | ||
592 | } | ||
593 | |||
594 | /* | ||
595 | * Allocate a suitably sized buddy. If no suitable buddy exists split higher | ||
596 | * order buddies until we have a suitable buddy to allocate. | ||
597 | * | ||
598 | * For PDE grouping add an extra check to see if a buddy is suitable: that the | ||
599 | * buddy exists in a PDE who's PTE size is reasonable | ||
600 | * | ||
601 | * @a must be locked. | ||
602 | */ | ||
603 | static u64 __balloc_do_alloc(struct gk20a_allocator *a, u64 order, int pte_size) | ||
604 | { | ||
605 | u64 split_order; | ||
606 | struct gk20a_buddy *bud; | ||
607 | |||
608 | split_order = order; | ||
609 | while (!(bud = __balloc_find_buddy(a, split_order, pte_size))) | ||
610 | split_order++; | ||
611 | |||
612 | while (bud->order != order) { | ||
613 | if (balloc_split_buddy(a, bud, pte_size)) | ||
614 | return 0; /* No mem... */ | ||
615 | bud = bud->left; | ||
616 | } | ||
617 | |||
618 | balloc_blist_rem(a, bud); | ||
619 | balloc_alloc_buddy(a, bud); | ||
119 | 620 | ||
120 | down_write(&allocator->rw_sema); | 621 | return bud->start; |
121 | bitmap_clear(allocator->bitmap, addr - allocator->base, len); | 622 | } |
122 | up_write(&allocator->rw_sema); | 623 | |
624 | /* | ||
625 | * Allocate memory from the passed allocator. | ||
626 | */ | ||
627 | u64 gk20a_balloc(struct gk20a_allocator *a, u64 len) | ||
628 | { | ||
629 | u64 order, addr; | ||
630 | int pte_size; | ||
631 | |||
632 | balloc_trace_func(); | ||
633 | |||
634 | balloc_lock(a); | ||
635 | |||
636 | order = balloc_get_order(a, len); | ||
637 | |||
638 | if (order > a->max_order) { | ||
639 | balloc_unlock(a); | ||
640 | balloc_dbg(a, "Alloc fail\n"); | ||
641 | balloc_trace_func_done(); | ||
642 | return 0; | ||
643 | } | ||
644 | |||
645 | /* | ||
646 | * For now pass the base address of the allocator's region to | ||
647 | * __get_pte_size(). This ensures we get the right page size for | ||
648 | * the alloc but we don't have to know what the real address is | ||
649 | * going to be quite yet. | ||
650 | * | ||
651 | * TODO: once userspace supports a unified address space pass 0 for | ||
652 | * the base. This will make only 'len' affect the PTE size. | ||
653 | */ | ||
654 | if (a->flags & GPU_BALLOC_GVA_SPACE) | ||
655 | pte_size = __get_pte_size(a->vm, a->base, len); | ||
656 | else | ||
657 | pte_size = BALLOC_PTE_SIZE_ANY; | ||
658 | |||
659 | addr = __balloc_do_alloc(a, order, pte_size); | ||
660 | |||
661 | a->bytes_alloced += len; | ||
662 | a->bytes_alloced_real += balloc_order_to_len(a, order); | ||
663 | |||
664 | balloc_unlock(a); | ||
665 | balloc_dbg(a, "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n", | ||
666 | addr, order, len, | ||
667 | pte_size == gmmu_page_size_big ? "big" : | ||
668 | pte_size == gmmu_page_size_small ? "small" : | ||
669 | "NA/any"); | ||
670 | |||
671 | balloc_trace_func_done(); | ||
672 | return addr; | ||
673 | } | ||
674 | |||
675 | /* | ||
676 | * See if the passed range is actually available for allocation. If so, then | ||
677 | * return 1, otherwise return 0. | ||
678 | * | ||
679 | * TODO: Right now this uses the unoptimal approach of going through all | ||
680 | * outstanding allocations and checking their base/ends. This could be better. | ||
681 | */ | ||
682 | static int balloc_is_range_free(struct gk20a_allocator *a, u64 base, u64 end) | ||
683 | { | ||
684 | struct rb_node *node; | ||
685 | struct gk20a_buddy *bud; | ||
686 | |||
687 | node = rb_first(&a->alloced_buddies); | ||
688 | if (!node) | ||
689 | return 1; /* No allocs yet. */ | ||
690 | |||
691 | bud = container_of(node, struct gk20a_buddy, alloced_entry); | ||
692 | |||
693 | while (bud->start < end) { | ||
694 | if ((bud->start > base && bud->start < end) || | ||
695 | (bud->end > base && bud->end < end)) | ||
696 | return 0; | ||
697 | |||
698 | node = rb_next(node); | ||
699 | if (!node) | ||
700 | break; | ||
701 | bud = container_of(node, struct gk20a_buddy, alloced_entry); | ||
702 | } | ||
703 | |||
704 | return 1; | ||
705 | } | ||
706 | |||
707 | static void balloc_alloc_fixed(struct gk20a_allocator *a, | ||
708 | struct gk20a_fixed_alloc *f) | ||
709 | { | ||
710 | struct rb_node **new = &(a->fixed_allocs.rb_node); | ||
711 | struct rb_node *parent = NULL; | ||
712 | |||
713 | while (*new) { | ||
714 | struct gk20a_fixed_alloc *falloc = | ||
715 | container_of(*new, struct gk20a_fixed_alloc, | ||
716 | alloced_entry); | ||
717 | |||
718 | parent = *new; | ||
719 | if (f->start < falloc->start) | ||
720 | new = &((*new)->rb_left); | ||
721 | else if (f->start > falloc->start) | ||
722 | new = &((*new)->rb_right); | ||
723 | else | ||
724 | BUG_ON("Duplicate entries in allocated list!\n"); | ||
725 | } | ||
726 | |||
727 | rb_link_node(&f->alloced_entry, parent, new); | ||
728 | rb_insert_color(&f->alloced_entry, &a->fixed_allocs); | ||
729 | } | ||
730 | |||
731 | /* | ||
732 | * Remove the passed buddy from the allocated buddy RB tree. Returns the | ||
733 | * deallocated buddy for further processing. | ||
734 | * | ||
735 | * @a must be locked. | ||
736 | */ | ||
737 | static struct gk20a_fixed_alloc *balloc_free_fixed(struct gk20a_allocator *a, | ||
738 | u64 addr) | ||
739 | { | ||
740 | struct rb_node *node = a->fixed_allocs.rb_node; | ||
741 | struct gk20a_fixed_alloc *falloc; | ||
742 | |||
743 | while (node) { | ||
744 | falloc = container_of(node, | ||
745 | struct gk20a_fixed_alloc, alloced_entry); | ||
746 | |||
747 | if (addr < falloc->start) | ||
748 | node = node->rb_left; | ||
749 | else if (addr > falloc->start) | ||
750 | node = node->rb_right; | ||
751 | else | ||
752 | break; | ||
753 | } | ||
754 | |||
755 | if (!node) | ||
756 | return NULL; | ||
757 | |||
758 | rb_erase(node, &a->fixed_allocs); | ||
759 | |||
760 | return falloc; | ||
761 | } | ||
762 | |||
763 | /* | ||
764 | * Find the parent range - doesn't necessarily need the parent to actually exist | ||
765 | * as a buddy. Finding an existing parent comes later... | ||
766 | */ | ||
767 | static void __balloc_get_parent_range(struct gk20a_allocator *a, | ||
768 | u64 base, u64 order, | ||
769 | u64 *pbase, u64 *porder) | ||
770 | { | ||
771 | u64 base_mask; | ||
772 | u64 shifted_base = balloc_base_shift(a, base); | ||
773 | |||
774 | order++; | ||
775 | base_mask = ~((a->blk_size << order) - 1); | ||
776 | |||
777 | shifted_base &= base_mask; | ||
778 | |||
779 | *pbase = balloc_base_unshift(a, shifted_base); | ||
780 | *porder = order; | ||
781 | } | ||
782 | |||
783 | /* | ||
784 | * Makes a buddy at the passed address. This will make all parent buddies | ||
785 | * necessary for this buddy to exist as well. | ||
786 | */ | ||
787 | static struct gk20a_buddy *__balloc_make_fixed_buddy(struct gk20a_allocator *a, | ||
788 | u64 base, u64 order) | ||
789 | { | ||
790 | struct gk20a_buddy *bud = NULL; | ||
791 | struct list_head *order_list; | ||
792 | u64 cur_order = order, cur_base = base; | ||
793 | |||
794 | /* | ||
795 | * Algo: | ||
796 | * 1. Keep jumping up a buddy order until we find the real buddy that | ||
797 | * this buddy exists in. | ||
798 | * 2. Then work our way down through the buddy tree until we hit a dead | ||
799 | * end. | ||
800 | * 3. Start splitting buddies until we split to the one we need to | ||
801 | * make. | ||
802 | */ | ||
803 | while (cur_order <= a->max_order) { | ||
804 | int found = 0; | ||
805 | |||
806 | order_list = balloc_get_order_list(a, cur_order); | ||
807 | list_for_each_entry(bud, order_list, buddy_entry) { | ||
808 | if (bud->start == cur_base) { | ||
809 | found = 1; | ||
810 | break; | ||
811 | } | ||
812 | } | ||
813 | |||
814 | if (found) | ||
815 | break; | ||
816 | |||
817 | __balloc_get_parent_range(a, cur_base, cur_order, | ||
818 | &cur_base, &cur_order); | ||
819 | } | ||
820 | |||
821 | if (cur_order > a->max_order) { | ||
822 | balloc_dbg(a, "No buddy for range ???\n"); | ||
823 | return NULL; | ||
824 | } | ||
825 | |||
826 | /* Split this buddy as necessary until we get the target buddy. */ | ||
827 | while (bud->start != base || bud->order != order) { | ||
828 | if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) { | ||
829 | balloc_coalesce(a, bud); | ||
830 | return NULL; | ||
831 | } | ||
832 | |||
833 | if (base < bud->right->start) | ||
834 | bud = bud->left; | ||
835 | else | ||
836 | bud = bud->right; | ||
837 | |||
838 | } | ||
839 | |||
840 | return bud; | ||
841 | } | ||
842 | |||
843 | static u64 __balloc_do_alloc_fixed(struct gk20a_allocator *a, | ||
844 | struct gk20a_fixed_alloc *falloc, | ||
845 | u64 base, u64 len) | ||
846 | { | ||
847 | u64 shifted_base, inc_base; | ||
848 | u64 align_order; | ||
849 | |||
850 | shifted_base = balloc_base_shift(a, base); | ||
851 | if (shifted_base == 0) | ||
852 | align_order = __fls(len >> a->blk_shift); | ||
853 | else | ||
854 | align_order = min_t(u64, | ||
855 | __ffs(shifted_base >> a->blk_shift), | ||
856 | __fls(len >> a->blk_shift)); | ||
857 | |||
858 | if (align_order > a->max_order) { | ||
859 | balloc_dbg(a, "Align order too big: %llu > %llu\n", | ||
860 | align_order, a->max_order); | ||
861 | return 0; | ||
862 | } | ||
863 | |||
864 | /* | ||
865 | * Generate a list of buddies that satisfy this allocation. | ||
866 | */ | ||
867 | inc_base = shifted_base; | ||
868 | while (inc_base < (shifted_base + len)) { | ||
869 | u64 order_len = balloc_order_to_len(a, align_order); | ||
870 | u64 remaining; | ||
871 | struct gk20a_buddy *bud; | ||
872 | |||
873 | bud = __balloc_make_fixed_buddy(a, | ||
874 | balloc_base_unshift(a, inc_base), | ||
875 | align_order); | ||
876 | if (!bud) { | ||
877 | balloc_dbg(a, "Fixed buddy failed: {0x%llx, %llu}!\n", | ||
878 | balloc_base_unshift(a, inc_base), | ||
879 | align_order); | ||
880 | goto err_and_cleanup; | ||
881 | } | ||
882 | |||
883 | balloc_blist_rem(a, bud); | ||
884 | balloc_alloc_buddy(a, bud); | ||
885 | __balloc_buddy_list_add(a, bud, &falloc->buddies); | ||
886 | |||
887 | /* Book keeping. */ | ||
888 | inc_base += order_len; | ||
889 | remaining = (shifted_base + len) - inc_base; | ||
890 | align_order = __ffs(inc_base >> a->blk_shift); | ||
891 | |||
892 | /* If we don't have much left - trim down align_order. */ | ||
893 | if (balloc_order_to_len(a, align_order) > remaining) | ||
894 | align_order = __balloc_max_order_in(a, inc_base, | ||
895 | inc_base + remaining); | ||
896 | } | ||
897 | |||
898 | return base; | ||
123 | 899 | ||
124 | allocator_dbg(allocator, "[out] addr %d, len %d", addr, len); | 900 | err_and_cleanup: |
901 | while (!list_empty(&falloc->buddies)) { | ||
902 | struct gk20a_buddy *bud = list_first_entry(&falloc->buddies, | ||
903 | struct gk20a_buddy, | ||
904 | buddy_entry); | ||
905 | |||
906 | __balloc_buddy_list_rem(a, bud); | ||
907 | balloc_free_buddy(a, bud->start); | ||
908 | kmem_cache_free(buddy_cache, bud); | ||
909 | } | ||
910 | |||
911 | return 0; | ||
912 | } | ||
913 | |||
914 | /* | ||
915 | * Allocate a fixed address allocation. The address of the allocation is @base | ||
916 | * and the length is @len. This is not a typical buddy allocator operation and | ||
917 | * as such has a high posibility of failure if the address space is heavily in | ||
918 | * use. | ||
919 | * | ||
920 | * Please do not use this function unless _absolutely_ necessary. | ||
921 | */ | ||
922 | u64 gk20a_balloc_fixed(struct gk20a_allocator *a, u64 base, u64 len) | ||
923 | { | ||
924 | struct gk20a_fixed_alloc *falloc = NULL; | ||
925 | struct gk20a_buddy *bud; | ||
926 | u64 ret, real_bytes = 0; | ||
927 | |||
928 | balloc_trace_func(); | ||
929 | |||
930 | /* If base isn't aligned to an order 0 block, fail. */ | ||
931 | if (base & (a->blk_size - 1)) | ||
932 | goto fail; | ||
933 | |||
934 | if (len == 0) | ||
935 | goto fail; | ||
936 | |||
937 | falloc = kmalloc(sizeof(*falloc), GFP_KERNEL); | ||
938 | if (!falloc) | ||
939 | goto fail; | ||
940 | |||
941 | INIT_LIST_HEAD(&falloc->buddies); | ||
942 | falloc->start = base; | ||
943 | falloc->end = base + len; | ||
944 | |||
945 | balloc_lock(a); | ||
946 | if (!balloc_is_range_free(a, base, base + len)) { | ||
947 | balloc_dbg(a, "Range not free: 0x%llx -> 0x%llx\n", | ||
948 | base, base + len); | ||
949 | goto fail_unlock; | ||
950 | } | ||
951 | |||
952 | ret = __balloc_do_alloc_fixed(a, falloc, base, len); | ||
953 | if (!ret) { | ||
954 | balloc_dbg(a, "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n", | ||
955 | base, base + len); | ||
956 | goto fail_unlock; | ||
957 | } | ||
958 | |||
959 | balloc_alloc_fixed(a, falloc); | ||
960 | |||
961 | list_for_each_entry(bud, &falloc->buddies, buddy_entry) | ||
962 | real_bytes += (bud->end - bud->start); | ||
963 | |||
964 | a->bytes_alloced += len; | ||
965 | a->bytes_alloced_real += real_bytes; | ||
966 | |||
967 | balloc_unlock(a); | ||
968 | balloc_dbg(a, "Alloc (fixed) 0x%llx\n", base); | ||
969 | |||
970 | balloc_trace_func_done(); | ||
971 | return base; | ||
972 | |||
973 | fail_unlock: | ||
974 | balloc_unlock(a); | ||
975 | fail: | ||
976 | kfree(falloc); | ||
977 | balloc_trace_func_done(); | ||
978 | return 0; | ||
979 | } | ||
980 | |||
981 | static void __balloc_do_free_fixed(struct gk20a_allocator *a, | ||
982 | struct gk20a_fixed_alloc *falloc) | ||
983 | { | ||
984 | struct gk20a_buddy *bud; | ||
985 | |||
986 | while (!list_empty(&falloc->buddies)) { | ||
987 | bud = list_first_entry(&falloc->buddies, | ||
988 | struct gk20a_buddy, | ||
989 | buddy_entry); | ||
990 | __balloc_buddy_list_rem(a, bud); | ||
991 | |||
992 | balloc_free_buddy(a, bud->start); | ||
993 | balloc_blist_add(a, bud); | ||
994 | a->bytes_freed += balloc_order_to_len(a, bud->order); | ||
995 | |||
996 | /* | ||
997 | * Attemp to defrag the allocation. | ||
998 | */ | ||
999 | balloc_coalesce(a, bud); | ||
1000 | } | ||
1001 | |||
1002 | kfree(falloc); | ||
1003 | } | ||
1004 | |||
1005 | /* | ||
1006 | * Free the passed allocation. | ||
1007 | */ | ||
1008 | void gk20a_bfree(struct gk20a_allocator *a, u64 addr) | ||
1009 | { | ||
1010 | struct gk20a_buddy *bud; | ||
1011 | struct gk20a_fixed_alloc *falloc; | ||
1012 | |||
1013 | balloc_trace_func(); | ||
1014 | |||
1015 | if (!addr) { | ||
1016 | balloc_trace_func_done(); | ||
1017 | return; | ||
1018 | } | ||
1019 | |||
1020 | balloc_lock(a); | ||
1021 | |||
1022 | /* | ||
1023 | * First see if this is a fixed alloc. If not fall back to a regular | ||
1024 | * buddy. | ||
1025 | */ | ||
1026 | falloc = balloc_free_fixed(a, addr); | ||
1027 | if (falloc) { | ||
1028 | __balloc_do_free_fixed(a, falloc); | ||
1029 | goto done; | ||
1030 | } | ||
1031 | |||
1032 | bud = balloc_free_buddy(a, addr); | ||
1033 | if (!bud) | ||
1034 | goto done; | ||
1035 | |||
1036 | balloc_blist_add(a, bud); | ||
1037 | a->bytes_freed += balloc_order_to_len(a, bud->order); | ||
1038 | |||
1039 | /* | ||
1040 | * Attemp to defrag the allocation. | ||
1041 | */ | ||
1042 | balloc_coalesce(a, bud); | ||
1043 | |||
1044 | done: | ||
1045 | balloc_unlock(a); | ||
1046 | balloc_dbg(a, "Free 0x%llx\n", addr); | ||
1047 | balloc_trace_func_done(); | ||
1048 | return; | ||
1049 | } | ||
1050 | |||
1051 | /* | ||
1052 | * Print the buddy allocator top level stats. If you pass @s as NULL then the | ||
1053 | * stats are printed to the kernel log. This lets this code be used for | ||
1054 | * debugging purposes internal to the allocator. | ||
1055 | */ | ||
1056 | static void balloc_print_stats(struct gk20a_allocator *a, struct seq_file *s, | ||
1057 | int lock) | ||
1058 | { | ||
1059 | #define __balloc_pstat(s, fmt, arg...) \ | ||
1060 | do { \ | ||
1061 | if (s) \ | ||
1062 | seq_printf(s, fmt, ##arg); \ | ||
1063 | else \ | ||
1064 | balloc_dbg(a, fmt, ##arg); \ | ||
1065 | } while (0) | ||
1066 | |||
1067 | int i; | ||
1068 | struct rb_node *node; | ||
1069 | struct gk20a_fixed_alloc *falloc; | ||
1070 | |||
1071 | __balloc_pstat(s, "base = %llu, limit = %llu, blk_size = %llu\n", | ||
1072 | a->base, a->length, a->blk_size); | ||
1073 | __balloc_pstat(s, "Internal params:\n"); | ||
1074 | __balloc_pstat(s, " start = %llu\n", a->start); | ||
1075 | __balloc_pstat(s, " end = %llu\n", a->end); | ||
1076 | __balloc_pstat(s, " count = %llu\n", a->count); | ||
1077 | __balloc_pstat(s, " blks = %llu\n", a->blks); | ||
1078 | __balloc_pstat(s, " max_order = %llu\n", a->max_order); | ||
1079 | |||
1080 | __balloc_pstat(s, "Buddy blocks:\n"); | ||
1081 | __balloc_pstat(s, " Order Free Alloced Split\n"); | ||
1082 | __balloc_pstat(s, " ----- ---- ------- -----\n"); | ||
1083 | |||
1084 | if (lock) | ||
1085 | balloc_lock(a); | ||
1086 | for (i = a->max_order; i >= 0; i--) { | ||
1087 | if (a->buddy_list_len[i] == 0 && | ||
1088 | a->buddy_list_alloced[i] == 0 && | ||
1089 | a->buddy_list_split[i] == 0) | ||
1090 | continue; | ||
1091 | |||
1092 | __balloc_pstat(s, " %3d %-7llu %-9llu %llu\n", i, | ||
1093 | a->buddy_list_len[i], | ||
1094 | a->buddy_list_alloced[i], | ||
1095 | a->buddy_list_split[i]); | ||
1096 | } | ||
1097 | |||
1098 | __balloc_pstat(s, "\n"); | ||
1099 | |||
1100 | for (node = rb_first(&a->fixed_allocs), i = 1; | ||
1101 | node != NULL; | ||
1102 | node = rb_next(node)) { | ||
1103 | falloc = container_of(node, | ||
1104 | struct gk20a_fixed_alloc, alloced_entry); | ||
1105 | |||
1106 | __balloc_pstat(s, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n", | ||
1107 | i, falloc->start, falloc->end); | ||
1108 | } | ||
1109 | |||
1110 | __balloc_pstat(s, "\n"); | ||
1111 | __balloc_pstat(s, "Bytes allocated: %llu\n", a->bytes_alloced); | ||
1112 | __balloc_pstat(s, "Bytes allocated (real): %llu\n", | ||
1113 | a->bytes_alloced_real); | ||
1114 | __balloc_pstat(s, "Bytes freed: %llu\n", a->bytes_freed); | ||
1115 | |||
1116 | if (lock) | ||
1117 | balloc_unlock(a); | ||
1118 | |||
1119 | #undef __balloc_pstats | ||
1120 | } | ||
1121 | |||
1122 | static int __alloc_show(struct seq_file *s, void *unused) | ||
1123 | { | ||
1124 | struct gk20a_allocator *a = s->private; | ||
1125 | |||
1126 | balloc_print_stats(a, s, 1); | ||
125 | 1127 | ||
126 | return 0; | 1128 | return 0; |
127 | } | 1129 | } |
1130 | |||
1131 | static int __alloc_open(struct inode *inode, struct file *file) | ||
1132 | { | ||
1133 | return single_open(file, __alloc_show, inode->i_private); | ||
1134 | } | ||
1135 | |||
1136 | static const struct file_operations __alloc_fops = { | ||
1137 | .open = __alloc_open, | ||
1138 | .read = seq_read, | ||
1139 | .llseek = seq_lseek, | ||
1140 | .release = single_release, | ||
1141 | }; | ||
1142 | |||
1143 | static void balloc_init_alloc_debug(struct gk20a_allocator *a) | ||
1144 | { | ||
1145 | if (!balloc_debugfs_root) | ||
1146 | return; | ||
1147 | |||
1148 | a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO, | ||
1149 | balloc_debugfs_root, | ||
1150 | a, &__alloc_fops); | ||
1151 | } | ||
1152 | |||
1153 | void gk20a_alloc_debugfs_init(struct platform_device *pdev) | ||
1154 | { | ||
1155 | struct gk20a_platform *platform = platform_get_drvdata(pdev); | ||
1156 | struct dentry *gpu_root = platform->debugfs; | ||
1157 | |||
1158 | balloc_debugfs_root = debugfs_create_dir("allocators", gpu_root); | ||
1159 | if (IS_ERR_OR_NULL(balloc_debugfs_root)) | ||
1160 | return; | ||
1161 | |||
1162 | debugfs_create_u32("tracing", 0664, balloc_debugfs_root, | ||
1163 | &balloc_tracing_on); | ||
1164 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h index 69a227bd..e86e053b 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | 2 | * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify it | 4 | * This program is free software; you can redistribute it and/or modify it |
5 | * under the terms and conditions of the GNU General Public License, | 5 | * under the terms and conditions of the GNU General Public License, |
@@ -17,75 +17,190 @@ | |||
17 | #ifndef GK20A_ALLOCATOR_H | 17 | #ifndef GK20A_ALLOCATOR_H |
18 | #define GK20A_ALLOCATOR_H | 18 | #define GK20A_ALLOCATOR_H |
19 | 19 | ||
20 | #include <linux/list.h> | ||
20 | #include <linux/rbtree.h> | 21 | #include <linux/rbtree.h> |
21 | #include <linux/rwsem.h> | 22 | #include <linux/debugfs.h> |
22 | #include <linux/slab.h> | 23 | #include <linux/platform_device.h> |
23 | 24 | ||
24 | /* #define ALLOCATOR_DEBUG */ | 25 | /* #define ALLOCATOR_DEBUG */ |
25 | 26 | ||
26 | /* main struct */ | 27 | /* |
28 | * Each buddy is an element in a binary tree. | ||
29 | */ | ||
30 | struct gk20a_buddy { | ||
31 | struct gk20a_buddy *parent; /* Parent node. */ | ||
32 | struct gk20a_buddy *buddy; /* This node's buddy. */ | ||
33 | struct gk20a_buddy *left; /* Lower address sub-node. */ | ||
34 | struct gk20a_buddy *right; /* Higher address sub-node. */ | ||
35 | |||
36 | struct list_head buddy_entry; /* List entry for various lists. */ | ||
37 | struct rb_node alloced_entry; /* RB tree of allocations. */ | ||
38 | |||
39 | u64 start; /* Start address of this buddy. */ | ||
40 | u64 end; /* End address of this buddy. */ | ||
41 | u64 order; /* Buddy order. */ | ||
42 | |||
43 | #define BALLOC_BUDDY_ALLOCED 0x1 | ||
44 | #define BALLOC_BUDDY_SPLIT 0x2 | ||
45 | #define BALLOC_BUDDY_IN_LIST 0x4 | ||
46 | int flags; /* List of associated flags. */ | ||
47 | |||
48 | /* | ||
49 | * Size of the PDE this buddy is using. This allows for grouping like | ||
50 | * sized allocations into the same PDE. | ||
51 | */ | ||
52 | #define BALLOC_PTE_SIZE_ANY 0x0 | ||
53 | #define BALLOC_PTE_SIZE_SMALL 0x1 | ||
54 | #define BALLOC_PTE_SIZE_BIG 0x2 | ||
55 | int pte_size; | ||
56 | }; | ||
57 | |||
58 | #define __buddy_flag_ops(flag, flag_up) \ | ||
59 | static inline int buddy_is_ ## flag(struct gk20a_buddy *b) \ | ||
60 | { \ | ||
61 | return b->flags & BALLOC_BUDDY_ ## flag_up; \ | ||
62 | } \ | ||
63 | static inline void buddy_set_ ## flag(struct gk20a_buddy *b) \ | ||
64 | { \ | ||
65 | b->flags |= BALLOC_BUDDY_ ## flag_up; \ | ||
66 | } \ | ||
67 | static inline void buddy_clr_ ## flag(struct gk20a_buddy *b) \ | ||
68 | { \ | ||
69 | b->flags &= ~BALLOC_BUDDY_ ## flag_up; \ | ||
70 | } | ||
71 | |||
72 | /* | ||
73 | * int buddy_is_alloced(struct gk20a_buddy *b); | ||
74 | * void buddy_set_alloced(struct gk20a_buddy *b); | ||
75 | * void buddy_clr_alloced(struct gk20a_buddy *b); | ||
76 | * | ||
77 | * int buddy_is_split(struct gk20a_buddy *b); | ||
78 | * void buddy_set_split(struct gk20a_buddy *b); | ||
79 | * void buddy_clr_split(struct gk20a_buddy *b); | ||
80 | * | ||
81 | * int buddy_is_in_list(struct gk20a_buddy *b); | ||
82 | * void buddy_set_in_list(struct gk20a_buddy *b); | ||
83 | * void buddy_clr_in_list(struct gk20a_buddy *b); | ||
84 | */ | ||
85 | __buddy_flag_ops(alloced, ALLOCED); | ||
86 | __buddy_flag_ops(split, SPLIT); | ||
87 | __buddy_flag_ops(in_list, IN_LIST); | ||
88 | |||
89 | /* | ||
90 | * Keeps info for a fixed allocation. | ||
91 | */ | ||
92 | struct gk20a_fixed_alloc { | ||
93 | struct list_head buddies; /* List of buddies. */ | ||
94 | struct rb_node alloced_entry; /* RB tree of fixed allocations. */ | ||
95 | |||
96 | u64 start; /* Start of fixed block. */ | ||
97 | u64 end; /* End address. */ | ||
98 | }; | ||
99 | |||
100 | struct vm_gk20a; | ||
101 | |||
102 | /* | ||
103 | * GPU buddy allocator for the various GPU address spaces. Each addressable unit | ||
104 | * doesn't have to correspond to a byte. In some cases each unit is a more | ||
105 | * complex object such as a comp_tag line or the like. | ||
106 | * | ||
107 | * The max order is computed based on the size of the minimum order and the size | ||
108 | * of the address space. | ||
109 | * | ||
110 | * order_size is the size of an order 0 buddy. | ||
111 | */ | ||
27 | struct gk20a_allocator { | 112 | struct gk20a_allocator { |
28 | 113 | ||
29 | char name[32]; /* name for allocator */ | 114 | struct vm_gk20a *vm; /* Parent VM - can be NULL. */ |
30 | struct rb_root rb_root; /* rb tree root for blocks */ | ||
31 | 115 | ||
32 | u32 base; /* min value of this linear space */ | 116 | char name[32]; /* Name of allocator. */ |
33 | u32 limit; /* max value = limit - 1 */ | ||
34 | 117 | ||
35 | unsigned long *bitmap; /* bitmap */ | 118 | u64 base; /* Base address of the space. */ |
119 | u64 length; /* Length of the space. */ | ||
120 | u64 blk_size; /* Size of order 0 allocation. */ | ||
121 | u64 blk_shift; /* Shift to divide by blk_size. */ | ||
36 | 122 | ||
37 | struct gk20a_alloc_block *block_first; /* first block in list */ | 123 | int init; /* Non-zero if initialized. */ |
38 | struct gk20a_alloc_block *block_recent; /* last visited block */ | ||
39 | 124 | ||
40 | u32 first_free_addr; /* first free addr, non-contigous | 125 | /* Internal stuff. */ |
41 | allocation preferred start, | 126 | u64 start; /* Real start (aligned to blk_size). */ |
42 | in order to pick up small holes */ | 127 | u64 end; /* Real end, trimmed if needed. */ |
43 | u32 last_free_addr; /* last free addr, contiguous | 128 | u64 count; /* Count of objects in space. */ |
44 | allocation preferred start */ | 129 | u64 blks; /* Count of blks in the space. */ |
45 | u32 cached_hole_size; /* max free hole size up to | 130 | u64 max_order; /* Specific maximum order. */ |
46 | last_free_addr */ | ||
47 | u32 block_count; /* number of blocks */ | ||
48 | 131 | ||
49 | struct rw_semaphore rw_sema; /* lock */ | 132 | struct rb_root alloced_buddies; /* Outstanding allocations. */ |
50 | struct kmem_cache *block_cache; /* slab cache */ | 133 | struct rb_root fixed_allocs; /* Outstanding fixed allocations. */ |
51 | 134 | ||
52 | /* if enabled, constrain to [base, limit) */ | 135 | struct mutex lock; /* Protects buddy access. */ |
53 | struct { | ||
54 | bool enable; | ||
55 | u32 base; | ||
56 | u32 limit; | ||
57 | } constraint; | ||
58 | 136 | ||
59 | int (*alloc)(struct gk20a_allocator *allocator, | 137 | #define GPU_BALLOC_GVA_SPACE 0x1 |
60 | u32 *addr, u32 len, u32 align); | 138 | u64 flags; |
61 | int (*free)(struct gk20a_allocator *allocator, | ||
62 | u32 addr, u32 len, u32 align); | ||
63 | 139 | ||
64 | }; | 140 | /* |
141 | * Impose an upper bound on the maximum order. | ||
142 | */ | ||
143 | #define GPU_BALLOC_MAX_ORDER 31 | ||
144 | #define GPU_BALLOC_ORDER_LIST_LEN (GPU_BALLOC_MAX_ORDER + 1) | ||
65 | 145 | ||
66 | int gk20a_allocator_init(struct gk20a_allocator *allocator, | 146 | struct list_head buddy_list[GPU_BALLOC_ORDER_LIST_LEN]; |
67 | const char *name, u32 base, u32 size); | 147 | u64 buddy_list_len[GPU_BALLOC_ORDER_LIST_LEN]; |
68 | void gk20a_allocator_destroy(struct gk20a_allocator *allocator); | 148 | u64 buddy_list_split[GPU_BALLOC_ORDER_LIST_LEN]; |
149 | u64 buddy_list_alloced[GPU_BALLOC_ORDER_LIST_LEN]; | ||
69 | 150 | ||
70 | int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator, | 151 | /* |
71 | u32 *addr, u32 len, u32 align); | 152 | * This is for when the allocator is managing a GVA space (the |
153 | * GPU_BALLOC_GVA_SPACE bit is set in @flags). This requires | ||
154 | * that we group like sized allocations into PDE blocks. | ||
155 | */ | ||
156 | u64 pte_blk_order; | ||
72 | 157 | ||
73 | int gk20a_allocator_block_free(struct gk20a_allocator *allocator, | 158 | struct dentry *debugfs_entry; |
74 | u32 addr, u32 len, u32 align); | ||
75 | 159 | ||
76 | #if defined(ALLOCATOR_DEBUG) | 160 | u64 bytes_alloced; |
161 | u64 bytes_alloced_real; | ||
162 | u64 bytes_freed; | ||
163 | }; | ||
77 | 164 | ||
78 | #define allocator_dbg(alloctor, format, arg...) \ | 165 | #define balloc_lock(a) mutex_lock(&(a)->lock) |
79 | do { \ | 166 | #define balloc_unlock(a) mutex_unlock(&(a)->lock) |
80 | if (1) \ | ||
81 | pr_debug("gk20a_allocator (%s) %s: " format "\n",\ | ||
82 | alloctor->name, __func__, ##arg);\ | ||
83 | } while (0) | ||
84 | 167 | ||
85 | #else /* ALLOCATOR_DEBUG */ | 168 | #define balloc_get_order_list(a, order) (&(a)->buddy_list[(order)]) |
169 | #define balloc_order_to_len(a, order) ((1 << order) * (a)->blk_size) | ||
170 | #define balloc_base_shift(a, base) ((base) - (a)->start) | ||
171 | #define balloc_base_unshift(a, base) ((base) + (a)->start) | ||
86 | 172 | ||
87 | #define allocator_dbg(format, arg...) | 173 | int gk20a_allocator_init(struct gk20a_allocator *allocator, |
174 | const char *name, u64 base, u64 size, u64 order0); | ||
175 | int __gk20a_allocator_init(struct gk20a_allocator *allocator, | ||
176 | struct vm_gk20a *vm, const char *name, | ||
177 | u64 base, u64 size, u64 order0, | ||
178 | u64 max_order, u64 flags); | ||
179 | void gk20a_allocator_destroy(struct gk20a_allocator *allocator); | ||
88 | 180 | ||
89 | #endif /* ALLOCATOR_DEBUG */ | 181 | /* |
182 | * Normal alloc/free operations for the buddy allocator. | ||
183 | */ | ||
184 | u64 gk20a_balloc(struct gk20a_allocator *allocator, u64 len); | ||
185 | void gk20a_bfree(struct gk20a_allocator *allocator, u64 addr); | ||
186 | |||
187 | /* | ||
188 | * Special interface to allocate a memory regions with a specific starting | ||
189 | * address. Yikes. | ||
190 | */ | ||
191 | u64 gk20a_balloc_fixed(struct gk20a_allocator *allocator, u64 base, u64 len); | ||
192 | |||
193 | /* | ||
194 | * Debugfs init. | ||
195 | */ | ||
196 | void gk20a_alloc_debugfs_init(struct platform_device *pdev); | ||
197 | |||
198 | #if defined(ALLOCATOR_DEBUG) | ||
199 | #define balloc_dbg(alloctor, format, arg...) \ | ||
200 | pr_info("%-25s %25s() " format, \ | ||
201 | alloctor->name, __func__, ##arg) | ||
202 | #else | ||
203 | #define balloc_dbg(allocator, format, arg...) | ||
204 | #endif | ||
90 | 205 | ||
91 | #endif /* GK20A_ALLOCATOR_H */ | 206 | #endif /* GK20A_ALLOCATOR_H */ |
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c index 02bea0a1..7cb386f0 100644 --- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c | |||
@@ -89,9 +89,8 @@ static int gk20a_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) | |||
89 | if (err) | 89 | if (err) |
90 | return err; | 90 | return err; |
91 | 91 | ||
92 | gk20a_allocator_init(&gr->comp_tags, "comptag", | 92 | __gk20a_allocator_init(&gr->comp_tags, NULL, "comptag", |
93 | 1, /* start */ | 93 | 1, max_comptag_lines - 1, 1, 10, 0); |
94 | max_comptag_lines - 1); /* length*/ | ||
95 | 94 | ||
96 | gr->comptags_per_cacheline = comptags_per_cacheline; | 95 | gr->comptags_per_cacheline = comptags_per_cacheline; |
97 | gr->slices_per_ltc = slices_per_fbp / g->ltc_count; | 96 | gr->slices_per_ltc = slices_per_fbp / g->ltc_count; |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 735c262a..a38db709 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -132,10 +132,8 @@ static void gk20a_mm_delete_priv(void *_priv) | |||
132 | 132 | ||
133 | if (priv->comptags.lines) { | 133 | if (priv->comptags.lines) { |
134 | BUG_ON(!priv->comptag_allocator); | 134 | BUG_ON(!priv->comptag_allocator); |
135 | priv->comptag_allocator->free(priv->comptag_allocator, | 135 | gk20a_bfree(priv->comptag_allocator, |
136 | priv->comptags.offset, | 136 | priv->comptags.real_offset); |
137 | priv->comptags.allocated_lines, | ||
138 | 1); | ||
139 | } | 137 | } |
140 | 138 | ||
141 | /* Free buffer states */ | 139 | /* Free buffer states */ |
@@ -226,10 +224,9 @@ static int gk20a_alloc_comptags(struct gk20a *g, | |||
226 | u32 *ctag_map_win_ctagline) | 224 | u32 *ctag_map_win_ctagline) |
227 | { | 225 | { |
228 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); | 226 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); |
229 | u32 offset = 0; | ||
230 | int err; | ||
231 | u32 ctaglines_to_allocate; | 227 | u32 ctaglines_to_allocate; |
232 | u32 ctagline_align; | 228 | u32 ctagline_align = 1; |
229 | u32 offset; | ||
233 | const u32 aggregate_cacheline_sz = | 230 | const u32 aggregate_cacheline_sz = |
234 | g->gr.cacheline_size * g->gr.slices_per_ltc * | 231 | g->gr.cacheline_size * g->gr.slices_per_ltc * |
235 | g->ltc_count; | 232 | g->ltc_count; |
@@ -243,7 +240,6 @@ static int gk20a_alloc_comptags(struct gk20a *g, | |||
243 | 240 | ||
244 | if (!user_mappable) { | 241 | if (!user_mappable) { |
245 | ctaglines_to_allocate = lines; | 242 | ctaglines_to_allocate = lines; |
246 | ctagline_align = 1; | ||
247 | } else { | 243 | } else { |
248 | /* Unfortunately, we cannot use allocation alignment | 244 | /* Unfortunately, we cannot use allocation alignment |
249 | * here, since compbits per cacheline is not always a | 245 | * here, since compbits per cacheline is not always a |
@@ -275,82 +271,26 @@ static int gk20a_alloc_comptags(struct gk20a *g, | |||
275 | 271 | ||
276 | if (ctaglines_to_allocate < lines) | 272 | if (ctaglines_to_allocate < lines) |
277 | return -EINVAL; /* integer overflow */ | 273 | return -EINVAL; /* integer overflow */ |
274 | pr_info("user-mapped CTAGS: %u\n", ctaglines_to_allocate); | ||
278 | } | 275 | } |
279 | 276 | ||
280 | /* store the allocator so we can use it when we free the ctags */ | 277 | /* store the allocator so we can use it when we free the ctags */ |
281 | priv->comptag_allocator = allocator; | 278 | priv->comptag_allocator = allocator; |
282 | err = allocator->alloc(allocator, &offset, | 279 | offset = gk20a_balloc(allocator, ctaglines_to_allocate); |
283 | ctaglines_to_allocate, 1); | 280 | if (!offset) |
284 | if (!err) { | 281 | return -ENOMEM; |
285 | const u32 alignment_lines = | ||
286 | DIV_ROUND_UP(offset, ctagline_align) * ctagline_align - | ||
287 | offset; | ||
288 | |||
289 | /* prune the preceding ctaglines that were allocated | ||
290 | for alignment */ | ||
291 | if (alignment_lines) { | ||
292 | /* free alignment lines */ | ||
293 | int tmp= | ||
294 | allocator->free(allocator, offset, | ||
295 | alignment_lines, | ||
296 | 1); | ||
297 | WARN_ON(tmp); | ||
298 | |||
299 | offset += alignment_lines; | ||
300 | ctaglines_to_allocate -= alignment_lines; | ||
301 | } | ||
302 | 282 | ||
303 | /* check if we can prune the trailing, too */ | 283 | priv->comptags.lines = lines; |
304 | if (user_mappable) | 284 | priv->comptags.real_offset = offset; |
305 | { | ||
306 | u32 needed_cachelines = | ||
307 | DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline); | ||
308 | |||
309 | u32 first_unneeded_cacheline = | ||
310 | DIV_ROUND_UP(round_up(needed_cachelines * | ||
311 | aggregate_cacheline_sz, | ||
312 | small_pgsz), | ||
313 | aggregate_cacheline_sz); | ||
314 | u32 needed_ctaglines = | ||
315 | first_unneeded_cacheline * | ||
316 | g->gr.comptags_per_cacheline; | ||
317 | |||
318 | u64 win_size; | ||
319 | |||
320 | if (needed_ctaglines < ctaglines_to_allocate) { | ||
321 | /* free alignment lines */ | ||
322 | int tmp= | ||
323 | allocator->free( | ||
324 | allocator, | ||
325 | offset + needed_ctaglines, | ||
326 | (ctaglines_to_allocate - | ||
327 | needed_ctaglines), | ||
328 | 1); | ||
329 | WARN_ON(tmp); | ||
330 | |||
331 | ctaglines_to_allocate = needed_ctaglines; | ||
332 | } | ||
333 | 285 | ||
334 | *ctag_map_win_ctagline = offset; | 286 | if (user_mappable) |
335 | win_size = | 287 | offset = DIV_ROUND_UP(offset, ctagline_align) * ctagline_align; |
336 | DIV_ROUND_UP(lines, | ||
337 | g->gr.comptags_per_cacheline) * | ||
338 | aggregate_cacheline_sz; | ||
339 | 288 | ||
340 | *ctag_map_win_size = round_up(win_size, small_pgsz); | 289 | priv->comptags.offset = offset; |
341 | } | ||
342 | 290 | ||
343 | priv->comptags.offset = offset; | 291 | return 0; |
344 | priv->comptags.lines = lines; | ||
345 | priv->comptags.allocated_lines = ctaglines_to_allocate; | ||
346 | priv->comptags.user_mappable = user_mappable; | ||
347 | } | ||
348 | return err; | ||
349 | } | 292 | } |
350 | 293 | ||
351 | |||
352 | |||
353 | |||
354 | static int gk20a_init_mm_reset_enable_hw(struct gk20a *g) | 294 | static int gk20a_init_mm_reset_enable_hw(struct gk20a *g) |
355 | { | 295 | { |
356 | gk20a_dbg_fn(""); | 296 | gk20a_dbg_fn(""); |
@@ -901,14 +841,12 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset) | |||
901 | } | 841 | } |
902 | 842 | ||
903 | u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, | 843 | u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, |
904 | u64 size, | 844 | u64 size, |
905 | enum gmmu_pgsz_gk20a gmmu_pgsz_idx) | 845 | enum gmmu_pgsz_gk20a gmmu_pgsz_idx) |
906 | 846 | ||
907 | { | 847 | { |
908 | struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx]; | 848 | struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx]; |
909 | int err; | ||
910 | u64 offset; | 849 | u64 offset; |
911 | u32 start_page_nr = 0, num_pages; | ||
912 | u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; | 850 | u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; |
913 | 851 | ||
914 | if (gmmu_pgsz_idx >= gmmu_nr_page_sizes) { | 852 | if (gmmu_pgsz_idx >= gmmu_nr_page_sizes) { |
@@ -924,28 +862,19 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, | |||
924 | 862 | ||
925 | } | 863 | } |
926 | 864 | ||
927 | /* be certain we round up to gmmu_page_size if needed */ | 865 | /* Be certain we round up to gmmu_page_size if needed */ |
928 | /* TBD: DIV_ROUND_UP -> undefined reference to __aeabi_uldivmod */ | ||
929 | size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1); | 866 | size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1); |
930 | |||
931 | gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size, | 867 | gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size, |
932 | vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10); | 868 | vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10); |
933 | 869 | ||
934 | /* The vma allocator represents page accounting. */ | 870 | offset = gk20a_balloc(vma, size); |
935 | num_pages = size >> ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]); | 871 | if (!offset) { |
936 | |||
937 | err = vma->alloc(vma, &start_page_nr, num_pages, 1); | ||
938 | |||
939 | if (err) { | ||
940 | gk20a_err(dev_from_vm(vm), | 872 | gk20a_err(dev_from_vm(vm), |
941 | "%s oom: sz=0x%llx", vma->name, size); | 873 | "%s oom: sz=0x%llx", vma->name, size); |
942 | return 0; | 874 | return 0; |
943 | } | 875 | } |
944 | 876 | ||
945 | offset = (u64)start_page_nr << | ||
946 | ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]); | ||
947 | gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset); | 877 | gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset); |
948 | |||
949 | return offset; | 878 | return offset; |
950 | } | 879 | } |
951 | 880 | ||
@@ -954,25 +883,12 @@ int gk20a_vm_free_va(struct vm_gk20a *vm, | |||
954 | enum gmmu_pgsz_gk20a pgsz_idx) | 883 | enum gmmu_pgsz_gk20a pgsz_idx) |
955 | { | 884 | { |
956 | struct gk20a_allocator *vma = &vm->vma[pgsz_idx]; | 885 | struct gk20a_allocator *vma = &vm->vma[pgsz_idx]; |
957 | u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; | ||
958 | u32 page_shift = ilog2(page_size); | ||
959 | u32 start_page_nr, num_pages; | ||
960 | int err; | ||
961 | 886 | ||
962 | gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", | 887 | gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", |
963 | vma->name, offset, size); | 888 | vma->name, offset, size); |
889 | gk20a_bfree(vma, offset); | ||
964 | 890 | ||
965 | start_page_nr = (u32)(offset >> page_shift); | 891 | return 0; |
966 | num_pages = (u32)((size + page_size - 1) >> page_shift); | ||
967 | |||
968 | err = vma->free(vma, start_page_nr, num_pages, 1); | ||
969 | if (err) { | ||
970 | gk20a_err(dev_from_vm(vm), | ||
971 | "not found: offset=0x%llx, sz=0x%llx", | ||
972 | offset, size); | ||
973 | } | ||
974 | |||
975 | return err; | ||
976 | } | 892 | } |
977 | 893 | ||
978 | static int insert_mapped_buffer(struct rb_root *root, | 894 | static int insert_mapped_buffer(struct rb_root *root, |
@@ -1169,7 +1085,7 @@ static int validate_fixed_buffer(struct vm_gk20a *vm, | |||
1169 | 1085 | ||
1170 | if (map_offset & (vm->gmmu_page_sizes[bfr->pgsz_idx] - 1)) { | 1086 | if (map_offset & (vm->gmmu_page_sizes[bfr->pgsz_idx] - 1)) { |
1171 | gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx", | 1087 | gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx", |
1172 | map_offset); | 1088 | map_offset); |
1173 | return -EINVAL; | 1089 | return -EINVAL; |
1174 | } | 1090 | } |
1175 | 1091 | ||
@@ -2613,7 +2529,6 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
2613 | char *name) | 2529 | char *name) |
2614 | { | 2530 | { |
2615 | int err, i; | 2531 | int err, i; |
2616 | u32 num_small_pages, num_large_pages, low_hole_pages; | ||
2617 | char alloc_name[32]; | 2532 | char alloc_name[32]; |
2618 | u64 small_vma_size, large_vma_size; | 2533 | u64 small_vma_size, large_vma_size; |
2619 | u32 pde_lo, pde_hi; | 2534 | u32 pde_lo, pde_hi; |
@@ -2674,34 +2589,31 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
2674 | large_vma_size = vm->va_limit - small_vma_size; | 2589 | large_vma_size = vm->va_limit - small_vma_size; |
2675 | } | 2590 | } |
2676 | 2591 | ||
2677 | num_small_pages = (u32)(small_vma_size >> | ||
2678 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_small])); | ||
2679 | |||
2680 | /* num_pages above is without regard to the low-side hole. */ | ||
2681 | low_hole_pages = (vm->va_start >> | ||
2682 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_small])); | ||
2683 | |||
2684 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, | 2592 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, |
2685 | vm->gmmu_page_sizes[gmmu_page_size_small]>>10); | 2593 | vm->gmmu_page_sizes[gmmu_page_size_small]>>10); |
2686 | err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small], | 2594 | err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small], |
2687 | alloc_name, | 2595 | vm, alloc_name, |
2688 | low_hole_pages, /*start*/ | 2596 | vm->va_start, |
2689 | num_small_pages - low_hole_pages);/* length*/ | 2597 | small_vma_size - vm->va_start, |
2598 | SZ_4K, | ||
2599 | GPU_BALLOC_MAX_ORDER, | ||
2600 | GPU_BALLOC_GVA_SPACE); | ||
2690 | if (err) | 2601 | if (err) |
2691 | goto clean_up_ptes; | 2602 | goto clean_up_ptes; |
2692 | 2603 | ||
2693 | if (big_pages) { | 2604 | if (big_pages) { |
2694 | u32 start = (u32)(small_vma_size >> | ||
2695 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_big])); | ||
2696 | num_large_pages = (u32)(large_vma_size >> | ||
2697 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_big])); | ||
2698 | |||
2699 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", | 2605 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", |
2700 | name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10); | 2606 | name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10); |
2701 | err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big], | 2607 | /* |
2702 | alloc_name, | 2608 | * Big page VMA starts at the end of the small page VMA. |
2703 | start, /* start */ | 2609 | */ |
2704 | num_large_pages); /* length */ | 2610 | err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big], |
2611 | vm, alloc_name, | ||
2612 | small_vma_size, | ||
2613 | large_vma_size, | ||
2614 | big_page_size, | ||
2615 | GPU_BALLOC_MAX_ORDER, | ||
2616 | GPU_BALLOC_GVA_SPACE); | ||
2705 | if (err) | 2617 | if (err) |
2706 | goto clean_up_small_allocator; | 2618 | goto clean_up_small_allocator; |
2707 | } | 2619 | } |
@@ -2782,9 +2694,9 @@ int gk20a_vm_release_share(struct gk20a_as_share *as_share) | |||
2782 | int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | 2694 | int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, |
2783 | struct nvgpu_as_alloc_space_args *args) | 2695 | struct nvgpu_as_alloc_space_args *args) |
2784 | 2696 | ||
2785 | { int err = -ENOMEM; | 2697 | { |
2698 | int err = -ENOMEM; | ||
2786 | int pgsz_idx = gmmu_page_size_small; | 2699 | int pgsz_idx = gmmu_page_size_small; |
2787 | u32 start_page_nr; | ||
2788 | struct gk20a_allocator *vma; | 2700 | struct gk20a_allocator *vma; |
2789 | struct vm_gk20a *vm = as_share->vm; | 2701 | struct vm_gk20a *vm = as_share->vm; |
2790 | struct gk20a *g = vm->mm->g; | 2702 | struct gk20a *g = vm->mm->g; |
@@ -2815,21 +2727,19 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
2815 | goto clean_up; | 2727 | goto clean_up; |
2816 | } | 2728 | } |
2817 | 2729 | ||
2818 | start_page_nr = 0; | 2730 | vma = &vm->vma[pgsz_idx]; |
2819 | if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) | 2731 | if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) |
2820 | start_page_nr = (u32)(args->o_a.offset >> | 2732 | vaddr_start = gk20a_balloc_fixed(vma, args->o_a.offset, |
2821 | ilog2(vm->gmmu_page_sizes[pgsz_idx])); | 2733 | (u64)args->pages * |
2734 | (u64)args->page_size); | ||
2735 | else | ||
2736 | vaddr_start = gk20a_balloc(vma, args->pages * args->page_size); | ||
2822 | 2737 | ||
2823 | vma = &vm->vma[pgsz_idx]; | 2738 | if (!vaddr_start) { |
2824 | err = vma->alloc(vma, &start_page_nr, args->pages, 1); | ||
2825 | if (err) { | ||
2826 | kfree(va_node); | 2739 | kfree(va_node); |
2827 | goto clean_up; | 2740 | goto clean_up; |
2828 | } | 2741 | } |
2829 | 2742 | ||
2830 | vaddr_start = (u64)start_page_nr << | ||
2831 | ilog2(vm->gmmu_page_sizes[pgsz_idx]); | ||
2832 | |||
2833 | va_node->vaddr_start = vaddr_start; | 2743 | va_node->vaddr_start = vaddr_start; |
2834 | va_node->size = (u64)args->page_size * (u64)args->pages; | 2744 | va_node->size = (u64)args->page_size * (u64)args->pages; |
2835 | va_node->pgsz_idx = pgsz_idx; | 2745 | va_node->pgsz_idx = pgsz_idx; |
@@ -2853,7 +2763,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
2853 | true); | 2763 | true); |
2854 | if (!map_offset) { | 2764 | if (!map_offset) { |
2855 | mutex_unlock(&vm->update_gmmu_lock); | 2765 | mutex_unlock(&vm->update_gmmu_lock); |
2856 | vma->free(vma, start_page_nr, args->pages, 1); | 2766 | gk20a_bfree(vma, vaddr_start); |
2857 | kfree(va_node); | 2767 | kfree(va_node); |
2858 | goto clean_up; | 2768 | goto clean_up; |
2859 | } | 2769 | } |
@@ -2865,6 +2775,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
2865 | mutex_unlock(&vm->update_gmmu_lock); | 2775 | mutex_unlock(&vm->update_gmmu_lock); |
2866 | 2776 | ||
2867 | args->o_a.offset = vaddr_start; | 2777 | args->o_a.offset = vaddr_start; |
2778 | err = 0; | ||
2868 | 2779 | ||
2869 | clean_up: | 2780 | clean_up: |
2870 | return err; | 2781 | return err; |
@@ -2875,7 +2786,6 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
2875 | { | 2786 | { |
2876 | int err = -ENOMEM; | 2787 | int err = -ENOMEM; |
2877 | int pgsz_idx; | 2788 | int pgsz_idx; |
2878 | u32 start_page_nr; | ||
2879 | struct gk20a_allocator *vma; | 2789 | struct gk20a_allocator *vma; |
2880 | struct vm_gk20a *vm = as_share->vm; | 2790 | struct vm_gk20a *vm = as_share->vm; |
2881 | struct vm_reserved_va_node *va_node; | 2791 | struct vm_reserved_va_node *va_node; |
@@ -2888,14 +2798,8 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
2888 | pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ? | 2798 | pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ? |
2889 | gmmu_page_size_big : gmmu_page_size_small; | 2799 | gmmu_page_size_big : gmmu_page_size_small; |
2890 | 2800 | ||
2891 | start_page_nr = (u32)(args->offset >> | ||
2892 | ilog2(vm->gmmu_page_sizes[pgsz_idx])); | ||
2893 | |||
2894 | vma = &vm->vma[pgsz_idx]; | 2801 | vma = &vm->vma[pgsz_idx]; |
2895 | err = vma->free(vma, start_page_nr, args->pages, 1); | 2802 | gk20a_bfree(vma, args->offset); |
2896 | |||
2897 | if (err) | ||
2898 | goto clean_up; | ||
2899 | 2803 | ||
2900 | mutex_lock(&vm->update_gmmu_lock); | 2804 | mutex_lock(&vm->update_gmmu_lock); |
2901 | va_node = addr_to_reservation(vm, args->offset); | 2805 | va_node = addr_to_reservation(vm, args->offset); |
@@ -2925,8 +2829,8 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
2925 | kfree(va_node); | 2829 | kfree(va_node); |
2926 | } | 2830 | } |
2927 | mutex_unlock(&vm->update_gmmu_lock); | 2831 | mutex_unlock(&vm->update_gmmu_lock); |
2832 | err = 0; | ||
2928 | 2833 | ||
2929 | clean_up: | ||
2930 | return err; | 2834 | return err; |
2931 | } | 2835 | } |
2932 | 2836 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index c1f8a4f0..82003cd0 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -131,6 +131,7 @@ enum gmmu_pgsz_gk20a { | |||
131 | }; | 131 | }; |
132 | 132 | ||
133 | struct gk20a_comptags { | 133 | struct gk20a_comptags { |
134 | u32 real_offset; | ||
134 | u32 offset; | 135 | u32 offset; |
135 | u32 lines; | 136 | u32 lines; |
136 | u32 allocated_lines; | 137 | u32 allocated_lines; |
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index 2456c784..11322293 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | |||
@@ -2816,7 +2816,6 @@ static int pmu_init_perfmon(struct pmu_gk20a *pmu) | |||
2816 | struct pmu_payload payload; | 2816 | struct pmu_payload payload; |
2817 | u32 seq; | 2817 | u32 seq; |
2818 | u32 data; | 2818 | u32 data; |
2819 | int err = 0; | ||
2820 | 2819 | ||
2821 | gk20a_dbg_fn(""); | 2820 | gk20a_dbg_fn(""); |
2822 | 2821 | ||
@@ -2867,12 +2866,11 @@ static int pmu_init_perfmon(struct pmu_gk20a *pmu) | |||
2867 | gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data); | 2866 | gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data); |
2868 | 2867 | ||
2869 | if (!pmu->sample_buffer) | 2868 | if (!pmu->sample_buffer) |
2870 | err = pmu->dmem.alloc(&pmu->dmem, | 2869 | pmu->sample_buffer = gk20a_balloc(&pmu->dmem, |
2871 | &pmu->sample_buffer, 2 * sizeof(u16), | 2870 | 2 * sizeof(u16)); |
2872 | PMU_DMEM_ALLOC_ALIGNMENT); | 2871 | if (!pmu->sample_buffer) { |
2873 | if (err) { | ||
2874 | gk20a_err(dev_from_gk20a(g), | 2872 | gk20a_err(dev_from_gk20a(g), |
2875 | "failed to allocate perfmon sample buffer"); | 2873 | "failed to allocate perfmon sample buffer"); |
2876 | return -ENOMEM; | 2874 | return -ENOMEM; |
2877 | } | 2875 | } |
2878 | 2876 | ||
@@ -2970,15 +2968,17 @@ static int pmu_process_init_msg(struct pmu_gk20a *pmu, | |||
2970 | for (i = 0; i < PMU_QUEUE_COUNT; i++) | 2968 | for (i = 0; i < PMU_QUEUE_COUNT; i++) |
2971 | pmu_queue_init(pmu, i, init); | 2969 | pmu_queue_init(pmu, i, init); |
2972 | 2970 | ||
2973 | if (!pmu->dmem.alloc) { | 2971 | if (!pmu->dmem.init) { |
2974 | /*Align start and end addresses*/ | 2972 | /* Align start and end addresses */ |
2975 | u32 start = ALIGN(pv->get_pmu_init_msg_pmu_sw_mg_off(init), | 2973 | u32 start = ALIGN(pv->get_pmu_init_msg_pmu_sw_mg_off(init), |
2976 | PMU_DMEM_ALLOC_ALIGNMENT); | 2974 | PMU_DMEM_ALLOC_ALIGNMENT); |
2977 | u32 end = (pv->get_pmu_init_msg_pmu_sw_mg_off(init) + | 2975 | u32 end = (pv->get_pmu_init_msg_pmu_sw_mg_off(init) + |
2978 | pv->get_pmu_init_msg_pmu_sw_mg_size(init)) & | 2976 | pv->get_pmu_init_msg_pmu_sw_mg_size(init)) & |
2979 | ~(PMU_DMEM_ALLOC_ALIGNMENT - 1); | 2977 | ~(PMU_DMEM_ALLOC_ALIGNMENT - 1); |
2980 | u32 size = end - start; | 2978 | u32 size = end - start; |
2981 | gk20a_allocator_init(&pmu->dmem, "gk20a_pmu_dmem", start, size); | 2979 | __gk20a_allocator_init(&pmu->dmem, NULL, "gk20a_pmu_dmem", |
2980 | start, size, | ||
2981 | PMU_DMEM_ALLOC_ALIGNMENT, 4, 0); | ||
2982 | } | 2982 | } |
2983 | 2983 | ||
2984 | pmu->pmu_ready = true; | 2984 | pmu->pmu_ready = true; |
@@ -3115,20 +3115,14 @@ static int pmu_response_handle(struct pmu_gk20a *pmu, | |||
3115 | seq->callback = NULL; | 3115 | seq->callback = NULL; |
3116 | if (pv->pmu_allocation_get_dmem_size(pmu, | 3116 | if (pv->pmu_allocation_get_dmem_size(pmu, |
3117 | pv->get_pmu_seq_in_a_ptr(seq)) != 0) | 3117 | pv->get_pmu_seq_in_a_ptr(seq)) != 0) |
3118 | pmu->dmem.free(&pmu->dmem, | 3118 | gk20a_bfree(&pmu->dmem, |
3119 | pv->pmu_allocation_get_dmem_offset(pmu, | 3119 | pv->pmu_allocation_get_dmem_offset(pmu, |
3120 | pv->get_pmu_seq_in_a_ptr(seq)), | 3120 | pv->get_pmu_seq_in_a_ptr(seq))); |
3121 | pv->pmu_allocation_get_dmem_size(pmu, | ||
3122 | pv->get_pmu_seq_in_a_ptr(seq)), | ||
3123 | PMU_DMEM_ALLOC_ALIGNMENT); | ||
3124 | if (pv->pmu_allocation_get_dmem_size(pmu, | 3121 | if (pv->pmu_allocation_get_dmem_size(pmu, |
3125 | pv->get_pmu_seq_out_a_ptr(seq)) != 0) | 3122 | pv->get_pmu_seq_out_a_ptr(seq)) != 0) |
3126 | pmu->dmem.free(&pmu->dmem, | 3123 | gk20a_bfree(&pmu->dmem, |
3127 | pv->pmu_allocation_get_dmem_offset(pmu, | 3124 | pv->pmu_allocation_get_dmem_offset(pmu, |
3128 | pv->get_pmu_seq_out_a_ptr(seq)), | 3125 | pv->get_pmu_seq_out_a_ptr(seq))); |
3129 | pv->pmu_allocation_get_dmem_size(pmu, | ||
3130 | pv->get_pmu_seq_out_a_ptr(seq)), | ||
3131 | PMU_DMEM_ALLOC_ALIGNMENT); | ||
3132 | 3126 | ||
3133 | if (seq->callback) | 3127 | if (seq->callback) |
3134 | seq->callback(g, msg, seq->cb_params, seq->desc, ret); | 3128 | seq->callback(g, msg, seq->cb_params, seq->desc, ret); |
@@ -3769,11 +3763,10 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, | |||
3769 | pv->pmu_allocation_set_dmem_size(pmu, in, | 3763 | pv->pmu_allocation_set_dmem_size(pmu, in, |
3770 | (u16)max(payload->in.size, payload->out.size)); | 3764 | (u16)max(payload->in.size, payload->out.size)); |
3771 | 3765 | ||
3772 | err = pmu->dmem.alloc(&pmu->dmem, | 3766 | *(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)) = |
3773 | pv->pmu_allocation_get_dmem_offset_addr(pmu, in), | 3767 | gk20a_balloc(&pmu->dmem, |
3774 | pv->pmu_allocation_get_dmem_size(pmu, in), | 3768 | pv->pmu_allocation_get_dmem_size(pmu, in)); |
3775 | PMU_DMEM_ALLOC_ALIGNMENT); | 3769 | if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu, in))) |
3776 | if (err) | ||
3777 | goto clean_up; | 3770 | goto clean_up; |
3778 | 3771 | ||
3779 | pmu_copy_to_dmem(pmu, (pv->pmu_allocation_get_dmem_offset(pmu, | 3772 | pmu_copy_to_dmem(pmu, (pv->pmu_allocation_get_dmem_offset(pmu, |
@@ -3794,11 +3787,12 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, | |||
3794 | (u16)payload->out.size); | 3787 | (u16)payload->out.size); |
3795 | 3788 | ||
3796 | if (payload->out.buf != payload->in.buf) { | 3789 | if (payload->out.buf != payload->in.buf) { |
3797 | err = pmu->dmem.alloc(&pmu->dmem, | 3790 | |
3798 | pv->pmu_allocation_get_dmem_offset_addr(pmu, out), | 3791 | *(pv->pmu_allocation_get_dmem_offset_addr(pmu, out)) = |
3799 | pv->pmu_allocation_get_dmem_size(pmu, out), | 3792 | gk20a_balloc(&pmu->dmem, |
3800 | PMU_DMEM_ALLOC_ALIGNMENT); | 3793 | pv->pmu_allocation_get_dmem_size(pmu, out)); |
3801 | if (err) | 3794 | if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu, |
3795 | out))) | ||
3802 | goto clean_up; | 3796 | goto clean_up; |
3803 | } else { | 3797 | } else { |
3804 | BUG_ON(in == NULL); | 3798 | BUG_ON(in == NULL); |
@@ -3826,15 +3820,11 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, | |||
3826 | clean_up: | 3820 | clean_up: |
3827 | gk20a_dbg_fn("fail"); | 3821 | gk20a_dbg_fn("fail"); |
3828 | if (in) | 3822 | if (in) |
3829 | pmu->dmem.free(&pmu->dmem, | 3823 | gk20a_bfree(&pmu->dmem, |
3830 | pv->pmu_allocation_get_dmem_offset(pmu, in), | 3824 | pv->pmu_allocation_get_dmem_offset(pmu, in)); |
3831 | pv->pmu_allocation_get_dmem_size(pmu, in), | ||
3832 | PMU_DMEM_ALLOC_ALIGNMENT); | ||
3833 | if (out) | 3825 | if (out) |
3834 | pmu->dmem.free(&pmu->dmem, | 3826 | gk20a_bfree(&pmu->dmem, |
3835 | pv->pmu_allocation_get_dmem_offset(pmu, out), | 3827 | pv->pmu_allocation_get_dmem_offset(pmu, out)); |
3836 | pv->pmu_allocation_get_dmem_size(pmu, out), | ||
3837 | PMU_DMEM_ALLOC_ALIGNMENT); | ||
3838 | 3828 | ||
3839 | pmu_seq_release(pmu, seq); | 3829 | pmu_seq_release(pmu, seq); |
3840 | return err; | 3830 | return err; |
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h index 73530b22..f29c810e 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h | |||
@@ -3,7 +3,7 @@ | |||
3 | * | 3 | * |
4 | * GK20A PMU (aka. gPMU outside gk20a context) | 4 | * GK20A PMU (aka. gPMU outside gk20a context) |
5 | * | 5 | * |
6 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | 6 | * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. |
7 | * | 7 | * |
8 | * This program is free software; you can redistribute it and/or modify it | 8 | * This program is free software; you can redistribute it and/or modify it |
9 | * under the terms and conditions of the GNU General Public License, | 9 | * under the terms and conditions of the GNU General Public License, |
@@ -466,7 +466,7 @@ struct pmu_ucode_desc { | |||
466 | #define PMU_UNIT_ID_IS_VALID(id) \ | 466 | #define PMU_UNIT_ID_IS_VALID(id) \ |
467 | (((id) < PMU_UNIT_END) || ((id) >= PMU_UNIT_TEST_START)) | 467 | (((id) < PMU_UNIT_END) || ((id) >= PMU_UNIT_TEST_START)) |
468 | 468 | ||
469 | #define PMU_DMEM_ALLOC_ALIGNMENT (32) | 469 | #define PMU_DMEM_ALLOC_ALIGNMENT (4) |
470 | #define PMU_DMEM_ALIGNMENT (4) | 470 | #define PMU_DMEM_ALIGNMENT (4) |
471 | 471 | ||
472 | #define PMU_CMD_FLAGS_PMU_MASK (0xF0) | 472 | #define PMU_CMD_FLAGS_PMU_MASK (0xF0) |
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c index 04f61c58..053550f6 100644 --- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c | |||
@@ -3,7 +3,7 @@ | |||
3 | * | 3 | * |
4 | * GK20A Semaphores | 4 | * GK20A Semaphores |
5 | * | 5 | * |
6 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | 6 | * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. |
7 | * | 7 | * |
8 | * This program is free software; you can redistribute it and/or modify it | 8 | * This program is free software; you can redistribute it and/or modify it |
9 | * under the terms and conditions of the GNU General Public License, | 9 | * under the terms and conditions of the GNU General Public License, |
@@ -44,8 +44,10 @@ struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(struct device *d, | |||
44 | if (gk20a_get_sgtable(d, &p->sgt, p->cpu_va, p->iova, p->size)) | 44 | if (gk20a_get_sgtable(d, &p->sgt, p->cpu_va, p->iova, p->size)) |
45 | goto clean_up; | 45 | goto clean_up; |
46 | 46 | ||
47 | if (gk20a_allocator_init(&p->alloc, unique_name, 0, | 47 | /* Sacrifice one semaphore in the name of returning error codes. */ |
48 | p->size)) | 48 | if (gk20a_allocator_init(&p->alloc, unique_name, |
49 | SEMAPHORE_SIZE, p->size - SEMAPHORE_SIZE, | ||
50 | SEMAPHORE_SIZE)) | ||
49 | goto clean_up; | 51 | goto clean_up; |
50 | 52 | ||
51 | gk20a_dbg_info("cpuva=%p iova=%llx phys=%llx", p->cpu_va, | 53 | gk20a_dbg_info("cpuva=%p iova=%llx phys=%llx", p->cpu_va, |
@@ -163,8 +165,8 @@ struct gk20a_semaphore *gk20a_semaphore_alloc(struct gk20a_semaphore_pool *pool) | |||
163 | if (!s) | 165 | if (!s) |
164 | return NULL; | 166 | return NULL; |
165 | 167 | ||
166 | if (pool->alloc.alloc(&pool->alloc, &s->offset, SEMAPHORE_SIZE, | 168 | s->offset = gk20a_balloc(&pool->alloc, SEMAPHORE_SIZE); |
167 | SEMAPHORE_SIZE)) { | 169 | if (!s->offset) { |
168 | gk20a_err(pool->dev, "failed to allocate semaphore"); | 170 | gk20a_err(pool->dev, "failed to allocate semaphore"); |
169 | kfree(s); | 171 | kfree(s); |
170 | return NULL; | 172 | return NULL; |
@@ -186,8 +188,7 @@ static void gk20a_semaphore_free(struct kref *ref) | |||
186 | struct gk20a_semaphore *s = | 188 | struct gk20a_semaphore *s = |
187 | container_of(ref, struct gk20a_semaphore, ref); | 189 | container_of(ref, struct gk20a_semaphore, ref); |
188 | 190 | ||
189 | s->pool->alloc.free(&s->pool->alloc, s->offset, SEMAPHORE_SIZE, | 191 | gk20a_bfree(&s->pool->alloc, s->offset); |
190 | SEMAPHORE_SIZE); | ||
191 | gk20a_semaphore_pool_put(s->pool); | 192 | gk20a_semaphore_pool_put(s->pool); |
192 | kfree(s); | 193 | kfree(s); |
193 | } | 194 | } |
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c index 9d16dba7..bc904ef3 100644 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | |||
@@ -90,9 +90,8 @@ static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) | |||
90 | if (err) | 90 | if (err) |
91 | return err; | 91 | return err; |
92 | 92 | ||
93 | gk20a_allocator_init(&gr->comp_tags, "comptag", | 93 | __gk20a_allocator_init(&gr->comp_tags, NULL, "comptag", |
94 | 1, /* start */ | 94 | 1, max_comptag_lines - 1, 1, 10, 0); |
95 | max_comptag_lines - 1); /* length*/ | ||
96 | 95 | ||
97 | gr->comptags_per_cacheline = comptags_per_cacheline; | 96 | gr->comptags_per_cacheline = comptags_per_cacheline; |
98 | gr->slices_per_ltc = slices_per_ltc; | 97 | gr->slices_per_ltc = slices_per_ltc; |
diff --git a/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c index 1beac216..211e34b5 100644 --- a/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c | |||
@@ -41,9 +41,8 @@ static int vgpu_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) | |||
41 | if (max_comptag_lines < 2) | 41 | if (max_comptag_lines < 2) |
42 | return -ENXIO; | 42 | return -ENXIO; |
43 | 43 | ||
44 | gk20a_allocator_init(&gr->comp_tags, "comptag", | 44 | __gk20a_allocator_init(&gr->comp_tags, NULL, "comptag", |
45 | 1, /* start */ | 45 | 1, max_comptag_lines - 1, 1, 10, 0); /* length*/ |
46 | max_comptag_lines - 1); /* length*/ | ||
47 | return 0; | 46 | return 0; |
48 | } | 47 | } |
49 | 48 | ||
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index 94e4602f..855aac0d 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c | |||
@@ -243,11 +243,9 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, | |||
243 | struct tegra_vgpu_as_share_params *p = &msg.params.as_share; | 243 | struct tegra_vgpu_as_share_params *p = &msg.params.as_share; |
244 | struct mm_gk20a *mm = &g->mm; | 244 | struct mm_gk20a *mm = &g->mm; |
245 | struct vm_gk20a *vm; | 245 | struct vm_gk20a *vm; |
246 | u32 num_small_pages, num_large_pages, low_hole_pages; | ||
247 | u64 small_vma_size, large_vma_size; | 246 | u64 small_vma_size, large_vma_size; |
248 | char name[32]; | 247 | char name[32]; |
249 | int err, i; | 248 | int err, i; |
250 | u32 start; | ||
251 | 249 | ||
252 | /* note: keep the page sizes sorted lowest to highest here */ | 250 | /* note: keep the page sizes sorted lowest to highest here */ |
253 | u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { | 251 | u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { |
@@ -294,33 +292,27 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, | |||
294 | small_vma_size = (u64)16 << 30; | 292 | small_vma_size = (u64)16 << 30; |
295 | large_vma_size = vm->va_limit - small_vma_size; | 293 | large_vma_size = vm->va_limit - small_vma_size; |
296 | 294 | ||
297 | num_small_pages = (u32)(small_vma_size >> | ||
298 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_small])); | ||
299 | |||
300 | /* num_pages above is without regard to the low-side hole. */ | ||
301 | low_hole_pages = (vm->va_start >> | ||
302 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_small])); | ||
303 | |||
304 | snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, | 295 | snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, |
305 | gmmu_page_sizes[gmmu_page_size_small]>>10); | 296 | gmmu_page_sizes[gmmu_page_size_small]>>10); |
306 | err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small], | 297 | err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small], |
307 | name, | 298 | vm, name, |
308 | low_hole_pages, /*start*/ | 299 | vm->va_start, |
309 | num_small_pages - low_hole_pages);/* length*/ | 300 | small_vma_size - vm->va_start, |
301 | SZ_4K, | ||
302 | GPU_BALLOC_MAX_ORDER, | ||
303 | GPU_BALLOC_GVA_SPACE); | ||
310 | if (err) | 304 | if (err) |
311 | goto clean_up_share; | 305 | goto clean_up_share; |
312 | 306 | ||
313 | start = (u32)(small_vma_size >> | ||
314 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_big])); | ||
315 | num_large_pages = (u32)(large_vma_size >> | ||
316 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_big])); | ||
317 | |||
318 | snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, | 307 | snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, |
319 | gmmu_page_sizes[gmmu_page_size_big]>>10); | 308 | gmmu_page_sizes[gmmu_page_size_big]>>10); |
320 | err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big], | 309 | err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big], |
321 | name, | 310 | vm, name, |
322 | start, /* start */ | 311 | small_vma_size, |
323 | num_large_pages); /* length */ | 312 | large_vma_size, |
313 | big_page_size, | ||
314 | GPU_BALLOC_MAX_ORDER, | ||
315 | GPU_BALLOC_GVA_SPACE); | ||
324 | if (err) | 316 | if (err) |
325 | goto clean_up_small_allocator; | 317 | goto clean_up_small_allocator; |
326 | 318 | ||