diff options
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c | 457 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/page_allocator_priv.h | 69 |
3 files changed, 486 insertions, 47 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c index 2e5d46b9..b8e38620 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/kernel.h> | 17 | #include <linux/kernel.h> |
18 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
19 | #include <linux/bitops.h> | 19 | #include <linux/bitops.h> |
20 | #include <linux/mm.h> | ||
20 | 21 | ||
21 | #include "gk20a_allocator.h" | 22 | #include "gk20a_allocator.h" |
22 | #include "buddy_allocator_priv.h" | 23 | #include "buddy_allocator_priv.h" |
@@ -27,8 +28,59 @@ | |||
27 | 28 | ||
28 | static struct kmem_cache *page_alloc_cache; | 29 | static struct kmem_cache *page_alloc_cache; |
29 | static struct kmem_cache *page_alloc_chunk_cache; | 30 | static struct kmem_cache *page_alloc_chunk_cache; |
31 | static struct kmem_cache *page_alloc_slab_page_cache; | ||
30 | static DEFINE_MUTEX(meta_data_cache_lock); | 32 | static DEFINE_MUTEX(meta_data_cache_lock); |
31 | 33 | ||
34 | /* | ||
35 | * Handle the book-keeping for these operations. | ||
36 | */ | ||
37 | static inline void add_slab_page_to_empty(struct page_alloc_slab *slab, | ||
38 | struct page_alloc_slab_page *page) | ||
39 | { | ||
40 | BUG_ON(page->state != SP_NONE); | ||
41 | list_add(&page->list_entry, &slab->empty); | ||
42 | slab->nr_empty++; | ||
43 | page->state = SP_EMPTY; | ||
44 | } | ||
45 | static inline void add_slab_page_to_partial(struct page_alloc_slab *slab, | ||
46 | struct page_alloc_slab_page *page) | ||
47 | { | ||
48 | BUG_ON(page->state != SP_NONE); | ||
49 | list_add(&page->list_entry, &slab->partial); | ||
50 | slab->nr_partial++; | ||
51 | page->state = SP_PARTIAL; | ||
52 | } | ||
53 | static inline void add_slab_page_to_full(struct page_alloc_slab *slab, | ||
54 | struct page_alloc_slab_page *page) | ||
55 | { | ||
56 | BUG_ON(page->state != SP_NONE); | ||
57 | list_add(&page->list_entry, &slab->full); | ||
58 | slab->nr_full++; | ||
59 | page->state = SP_FULL; | ||
60 | } | ||
61 | |||
62 | static inline void del_slab_page_from_empty(struct page_alloc_slab *slab, | ||
63 | struct page_alloc_slab_page *page) | ||
64 | { | ||
65 | list_del_init(&page->list_entry); | ||
66 | slab->nr_empty--; | ||
67 | page->state = SP_NONE; | ||
68 | } | ||
69 | static inline void del_slab_page_from_partial(struct page_alloc_slab *slab, | ||
70 | struct page_alloc_slab_page *page) | ||
71 | { | ||
72 | list_del_init(&page->list_entry); | ||
73 | slab->nr_partial--; | ||
74 | page->state = SP_NONE; | ||
75 | } | ||
76 | static inline void del_slab_page_from_full(struct page_alloc_slab *slab, | ||
77 | struct page_alloc_slab_page *page) | ||
78 | { | ||
79 | list_del_init(&page->list_entry); | ||
80 | slab->nr_full--; | ||
81 | page->state = SP_NONE; | ||
82 | } | ||
83 | |||
32 | static u64 gk20a_page_alloc_length(struct gk20a_allocator *a) | 84 | static u64 gk20a_page_alloc_length(struct gk20a_allocator *a) |
33 | { | 85 | { |
34 | struct gk20a_page_allocator *va = a->priv; | 86 | struct gk20a_page_allocator *va = a->priv; |
@@ -80,6 +132,26 @@ static void gk20a_page_release_co(struct gk20a_allocator *a, | |||
80 | gk20a_alloc_release_carveout(&va->source_allocator, co); | 132 | gk20a_alloc_release_carveout(&va->source_allocator, co); |
81 | } | 133 | } |
82 | 134 | ||
135 | static void __gk20a_free_pages(struct gk20a_page_allocator *a, | ||
136 | struct gk20a_page_alloc *alloc, | ||
137 | bool free_buddy_alloc) | ||
138 | { | ||
139 | struct page_alloc_chunk *chunk; | ||
140 | |||
141 | while (!list_empty(&alloc->alloc_chunks)) { | ||
142 | chunk = list_first_entry(&alloc->alloc_chunks, | ||
143 | struct page_alloc_chunk, | ||
144 | list_entry); | ||
145 | list_del(&chunk->list_entry); | ||
146 | |||
147 | if (free_buddy_alloc) | ||
148 | gk20a_free(&a->source_allocator, chunk->base); | ||
149 | kfree(chunk); | ||
150 | } | ||
151 | |||
152 | kfree(alloc); | ||
153 | } | ||
154 | |||
83 | static int __insert_page_alloc(struct gk20a_page_allocator *a, | 155 | static int __insert_page_alloc(struct gk20a_page_allocator *a, |
84 | struct gk20a_page_alloc *alloc) | 156 | struct gk20a_page_alloc *alloc) |
85 | { | 157 | { |
@@ -134,13 +206,236 @@ static struct gk20a_page_alloc *__find_page_alloc( | |||
134 | return alloc; | 206 | return alloc; |
135 | } | 207 | } |
136 | 208 | ||
209 | static struct page_alloc_slab_page *alloc_slab_page( | ||
210 | struct gk20a_page_allocator *a, | ||
211 | struct page_alloc_slab *slab) | ||
212 | { | ||
213 | struct page_alloc_slab_page *slab_page; | ||
214 | |||
215 | slab_page = kmem_cache_alloc(page_alloc_slab_page_cache, GFP_KERNEL); | ||
216 | if (!slab_page) { | ||
217 | palloc_dbg(a, "OOM: unable to alloc slab_page struct!\n"); | ||
218 | return ERR_PTR(-ENOMEM); | ||
219 | } | ||
220 | |||
221 | memset(slab_page, 0, sizeof(*slab_page)); | ||
222 | |||
223 | slab_page->page_addr = gk20a_alloc(&a->source_allocator, a->page_size); | ||
224 | if (!slab_page->page_addr) { | ||
225 | kfree(slab_page); | ||
226 | palloc_dbg(a, "OOM: vidmem is full!\n"); | ||
227 | return ERR_PTR(-ENOMEM); | ||
228 | } | ||
229 | |||
230 | INIT_LIST_HEAD(&slab_page->list_entry); | ||
231 | slab_page->slab_size = slab->slab_size; | ||
232 | slab_page->nr_objects = a->page_size / slab->slab_size; | ||
233 | slab_page->nr_objects_alloced = 0; | ||
234 | slab_page->owner = slab; | ||
235 | slab_page->state = SP_NONE; | ||
236 | |||
237 | a->pages_alloced++; | ||
238 | |||
239 | palloc_dbg(a, "Allocated new slab page @ 0x%012llx size=%u\n", | ||
240 | slab_page->page_addr, slab_page->slab_size); | ||
241 | |||
242 | return slab_page; | ||
243 | } | ||
244 | |||
245 | static void free_slab_page(struct gk20a_page_allocator *a, | ||
246 | struct page_alloc_slab_page *slab_page) | ||
247 | { | ||
248 | palloc_dbg(a, "Freeing slab page @ 0x%012llx\n", slab_page->page_addr); | ||
249 | |||
250 | BUG_ON((slab_page->state != SP_NONE && slab_page->state != SP_EMPTY) || | ||
251 | slab_page->nr_objects_alloced != 0 || | ||
252 | slab_page->bitmap != 0); | ||
253 | |||
254 | gk20a_free(&a->source_allocator, slab_page->page_addr); | ||
255 | a->pages_freed++; | ||
256 | |||
257 | kmem_cache_free(page_alloc_slab_page_cache, slab_page); | ||
258 | } | ||
259 | |||
260 | /* | ||
261 | * This expects @alloc to have 1 empty page_alloc_chunk already added to the | ||
262 | * alloc_chunks list. | ||
263 | */ | ||
264 | static int __do_slab_alloc(struct gk20a_page_allocator *a, | ||
265 | struct page_alloc_slab *slab, | ||
266 | struct gk20a_page_alloc *alloc) | ||
267 | { | ||
268 | struct page_alloc_slab_page *slab_page = NULL; | ||
269 | struct page_alloc_chunk *chunk; | ||
270 | unsigned long offs; | ||
271 | |||
272 | /* | ||
273 | * Check the partial and empty lists to see if we have some space | ||
274 | * readily available. Take the slab_page out of what ever list it | ||
275 | * was in since it may be put back into a different list later. | ||
276 | */ | ||
277 | if (!list_empty(&slab->partial)) { | ||
278 | slab_page = list_first_entry(&slab->partial, | ||
279 | struct page_alloc_slab_page, | ||
280 | list_entry); | ||
281 | del_slab_page_from_partial(slab, slab_page); | ||
282 | } else if (!list_empty(&slab->empty)) { | ||
283 | slab_page = list_first_entry(&slab->empty, | ||
284 | struct page_alloc_slab_page, | ||
285 | list_entry); | ||
286 | del_slab_page_from_empty(slab, slab_page); | ||
287 | } | ||
288 | |||
289 | if (!slab_page) { | ||
290 | slab_page = alloc_slab_page(a, slab); | ||
291 | if (IS_ERR(slab_page)) | ||
292 | return PTR_ERR(slab_page); | ||
293 | } | ||
294 | |||
295 | /* | ||
296 | * We now have a slab_page. Do the alloc. | ||
297 | */ | ||
298 | offs = bitmap_find_next_zero_area(&slab_page->bitmap, | ||
299 | slab_page->nr_objects, | ||
300 | 0, 1, 0); | ||
301 | if (offs >= slab_page->nr_objects) { | ||
302 | WARN(1, "Empty/partial slab with no free objects?"); | ||
303 | |||
304 | /* Add the buggy page to the full list... This isn't ideal. */ | ||
305 | add_slab_page_to_full(slab, slab_page); | ||
306 | return -ENOMEM; | ||
307 | } | ||
308 | |||
309 | bitmap_set(&slab_page->bitmap, offs, 1); | ||
310 | slab_page->nr_objects_alloced++; | ||
311 | |||
312 | if (slab_page->nr_objects_alloced < slab_page->nr_objects) | ||
313 | add_slab_page_to_partial(slab, slab_page); | ||
314 | else if (slab_page->nr_objects_alloced == slab_page->nr_objects) | ||
315 | add_slab_page_to_full(slab, slab_page); | ||
316 | else | ||
317 | BUG(); /* Should be impossible to hit this. */ | ||
318 | |||
319 | /* | ||
320 | * Handle building the gk20a_page_alloc struct. We expect one | ||
321 | * page_alloc_chunk to be present. | ||
322 | */ | ||
323 | alloc->slab_page = slab_page; | ||
324 | alloc->nr_chunks = 1; | ||
325 | alloc->length = slab_page->slab_size; | ||
326 | alloc->base = slab_page->page_addr + (offs * slab_page->slab_size); | ||
327 | |||
328 | chunk = list_first_entry(&alloc->alloc_chunks, | ||
329 | struct page_alloc_chunk, list_entry); | ||
330 | chunk->base = alloc->base; | ||
331 | chunk->length = alloc->length; | ||
332 | |||
333 | return 0; | ||
334 | } | ||
335 | |||
336 | /* | ||
337 | * Allocate from a slab instead of directly from the page allocator. | ||
338 | */ | ||
339 | static struct gk20a_page_alloc *__gk20a_alloc_slab( | ||
340 | struct gk20a_page_allocator *a, u64 len) | ||
341 | { | ||
342 | int err, slab_nr; | ||
343 | struct page_alloc_slab *slab; | ||
344 | struct gk20a_page_alloc *alloc = NULL; | ||
345 | struct page_alloc_chunk *chunk = NULL; | ||
346 | |||
347 | /* | ||
348 | * Align the length to a page and then divide by the page size (4k for | ||
349 | * this code). ilog2() of that then gets us the correct slab to use. | ||
350 | */ | ||
351 | slab_nr = (int)ilog2(PAGE_ALIGN(len) >> 12); | ||
352 | slab = &a->slabs[slab_nr]; | ||
353 | |||
354 | alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL); | ||
355 | if (!alloc) { | ||
356 | palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n"); | ||
357 | goto fail; | ||
358 | } | ||
359 | chunk = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL); | ||
360 | if (!chunk) { | ||
361 | palloc_dbg(a, "OOM: could not alloc alloc_chunk struct!\n"); | ||
362 | goto fail; | ||
363 | } | ||
364 | |||
365 | INIT_LIST_HEAD(&alloc->alloc_chunks); | ||
366 | list_add(&chunk->list_entry, &alloc->alloc_chunks); | ||
367 | |||
368 | err = __do_slab_alloc(a, slab, alloc); | ||
369 | if (err) | ||
370 | goto fail; | ||
371 | |||
372 | palloc_dbg(a, "Alloc 0x%04llx sr=%d id=0x%010llx [slab]\n", | ||
373 | len, slab_nr, alloc->base); | ||
374 | a->nr_slab_allocs++; | ||
375 | |||
376 | return alloc; | ||
377 | |||
378 | fail: | ||
379 | kfree(alloc); | ||
380 | kfree(chunk); | ||
381 | return ERR_PTR(-ENOMEM); | ||
382 | } | ||
383 | |||
384 | static void __gk20a_free_slab(struct gk20a_page_allocator *a, | ||
385 | struct gk20a_page_alloc *alloc) | ||
386 | { | ||
387 | struct page_alloc_slab_page *slab_page = alloc->slab_page; | ||
388 | struct page_alloc_slab *slab = slab_page->owner; | ||
389 | enum slab_page_state new_state; | ||
390 | int offs; | ||
391 | |||
392 | offs = (alloc->base - slab_page->page_addr) / slab_page->slab_size; | ||
393 | bitmap_clear(&slab_page->bitmap, offs, 1); | ||
394 | |||
395 | slab_page->nr_objects_alloced--; | ||
396 | |||
397 | if (slab_page->nr_objects_alloced == 0) | ||
398 | new_state = SP_EMPTY; | ||
399 | else | ||
400 | new_state = SP_PARTIAL; | ||
401 | |||
402 | /* | ||
403 | * Need to migrate the page to a different list. | ||
404 | */ | ||
405 | if (new_state != slab_page->state) { | ||
406 | /* Delete - can't be in empty. */ | ||
407 | if (slab_page->state == SP_PARTIAL) | ||
408 | del_slab_page_from_partial(slab, slab_page); | ||
409 | else | ||
410 | del_slab_page_from_full(slab, slab_page); | ||
411 | |||
412 | /* And add. */ | ||
413 | if (new_state == SP_EMPTY) { | ||
414 | if (list_empty(&slab->empty)) | ||
415 | add_slab_page_to_empty(slab, slab_page); | ||
416 | else | ||
417 | free_slab_page(a, slab_page); | ||
418 | } else { | ||
419 | add_slab_page_to_partial(slab, slab_page); | ||
420 | } | ||
421 | } | ||
422 | |||
423 | /* | ||
424 | * Now handle the page_alloc. | ||
425 | */ | ||
426 | __gk20a_free_pages(a, alloc, false); | ||
427 | a->nr_slab_frees++; | ||
428 | |||
429 | return; | ||
430 | } | ||
431 | |||
137 | /* | 432 | /* |
138 | * Allocate physical pages. Since the underlying allocator is a buddy allocator | 433 | * Allocate physical pages. Since the underlying allocator is a buddy allocator |
139 | * the returned pages are always contiguous. However, since there could be | 434 | * the returned pages are always contiguous. However, since there could be |
140 | * fragmentation in the space this allocator will collate smaller non-contiguous | 435 | * fragmentation in the space this allocator will collate smaller non-contiguous |
141 | * allocations together if necessary. | 436 | * allocations together if necessary. |
142 | */ | 437 | */ |
143 | static struct gk20a_page_alloc *__gk20a_alloc_pages( | 438 | static struct gk20a_page_alloc *__do_gk20a_alloc_pages( |
144 | struct gk20a_page_allocator *a, u64 pages) | 439 | struct gk20a_page_allocator *a, u64 pages) |
145 | { | 440 | { |
146 | struct gk20a_page_alloc *alloc; | 441 | struct gk20a_page_alloc *alloc; |
@@ -152,6 +447,8 @@ static struct gk20a_page_alloc *__gk20a_alloc_pages( | |||
152 | if (!alloc) | 447 | if (!alloc) |
153 | goto fail; | 448 | goto fail; |
154 | 449 | ||
450 | memset(alloc, 0, sizeof(*alloc)); | ||
451 | |||
155 | INIT_LIST_HEAD(&alloc->alloc_chunks); | 452 | INIT_LIST_HEAD(&alloc->alloc_chunks); |
156 | alloc->length = pages << a->page_shift; | 453 | alloc->length = pages << a->page_shift; |
157 | 454 | ||
@@ -233,6 +530,33 @@ fail: | |||
233 | return ERR_PTR(-ENOMEM); | 530 | return ERR_PTR(-ENOMEM); |
234 | } | 531 | } |
235 | 532 | ||
533 | static struct gk20a_page_alloc *__gk20a_alloc_pages( | ||
534 | struct gk20a_page_allocator *a, u64 len) | ||
535 | { | ||
536 | struct gk20a_page_alloc *alloc = NULL; | ||
537 | struct page_alloc_chunk *c; | ||
538 | u64 pages; | ||
539 | int i = 0; | ||
540 | |||
541 | pages = ALIGN(len, a->page_size) >> a->page_shift; | ||
542 | |||
543 | alloc = __do_gk20a_alloc_pages(a, pages); | ||
544 | if (IS_ERR(alloc)) { | ||
545 | palloc_dbg(a, "Alloc 0x%llx (%llu) (failed)\n", | ||
546 | pages << a->page_shift, pages); | ||
547 | return NULL; | ||
548 | } | ||
549 | |||
550 | palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", | ||
551 | pages << a->page_shift, pages, alloc->base); | ||
552 | list_for_each_entry(c, &alloc->alloc_chunks, list_entry) { | ||
553 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", | ||
554 | i++, c->base, c->length); | ||
555 | } | ||
556 | |||
557 | return alloc; | ||
558 | } | ||
559 | |||
236 | /* | 560 | /* |
237 | * Allocate enough pages to satisfy @len. Page size is determined at | 561 | * Allocate enough pages to satisfy @len. Page size is determined at |
238 | * initialization of the allocator. | 562 | * initialization of the allocator. |
@@ -247,10 +571,7 @@ static u64 gk20a_page_alloc(struct gk20a_allocator *__a, u64 len) | |||
247 | { | 571 | { |
248 | struct gk20a_page_allocator *a = page_allocator(__a); | 572 | struct gk20a_page_allocator *a = page_allocator(__a); |
249 | struct gk20a_page_alloc *alloc = NULL; | 573 | struct gk20a_page_alloc *alloc = NULL; |
250 | struct page_alloc_chunk *c; | ||
251 | u64 real_len; | 574 | u64 real_len; |
252 | u64 pages; | ||
253 | int i = 0; | ||
254 | 575 | ||
255 | /* | 576 | /* |
256 | * If we want contig pages we have to round up to a power of two. It's | 577 | * If we want contig pages we have to round up to a power of two. It's |
@@ -259,30 +580,24 @@ static u64 gk20a_page_alloc(struct gk20a_allocator *__a, u64 len) | |||
259 | real_len = a->flags & GPU_ALLOC_FORCE_CONTIG ? | 580 | real_len = a->flags & GPU_ALLOC_FORCE_CONTIG ? |
260 | roundup_pow_of_two(len) : len; | 581 | roundup_pow_of_two(len) : len; |
261 | 582 | ||
262 | pages = ALIGN(real_len, a->page_size) >> a->page_shift; | ||
263 | |||
264 | alloc_lock(__a); | 583 | alloc_lock(__a); |
584 | if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES && | ||
585 | real_len <= (a->page_size / 2)) | ||
586 | alloc = __gk20a_alloc_slab(a, real_len); | ||
587 | else | ||
588 | alloc = __gk20a_alloc_pages(a, real_len); | ||
265 | 589 | ||
266 | alloc = __gk20a_alloc_pages(a, pages); | 590 | if (!alloc) { |
267 | if (IS_ERR(alloc)) { | ||
268 | alloc_unlock(__a); | 591 | alloc_unlock(__a); |
269 | palloc_dbg(a, "Alloc 0x%llx (%llu) (failed)\n", | ||
270 | pages << a->page_shift, pages); | ||
271 | return 0; | 592 | return 0; |
272 | } | 593 | } |
273 | 594 | ||
274 | __insert_page_alloc(a, alloc); | 595 | __insert_page_alloc(a, alloc); |
275 | alloc_unlock(__a); | ||
276 | |||
277 | palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", | ||
278 | pages << a->page_shift, pages, alloc->base); | ||
279 | list_for_each_entry(c, &alloc->alloc_chunks, list_entry) { | ||
280 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", | ||
281 | i++, c->base, c->length); | ||
282 | } | ||
283 | 596 | ||
284 | a->nr_allocs++; | 597 | a->nr_allocs++; |
285 | a->pages_alloced += pages; | 598 | if (real_len > a->page_size / 2) |
599 | a->pages_alloced += alloc->length >> a->page_shift; | ||
600 | alloc_unlock(__a); | ||
286 | 601 | ||
287 | if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) | 602 | if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) |
288 | return alloc->base; | 603 | return alloc->base; |
@@ -290,24 +605,6 @@ static u64 gk20a_page_alloc(struct gk20a_allocator *__a, u64 len) | |||
290 | return (u64) (uintptr_t) alloc; | 605 | return (u64) (uintptr_t) alloc; |
291 | } | 606 | } |
292 | 607 | ||
293 | static void __gk20a_free_pages(struct gk20a_page_allocator *a, | ||
294 | struct gk20a_page_alloc *alloc) | ||
295 | { | ||
296 | struct page_alloc_chunk *chunk; | ||
297 | |||
298 | while (!list_empty(&alloc->alloc_chunks)) { | ||
299 | chunk = list_first_entry(&alloc->alloc_chunks, | ||
300 | struct page_alloc_chunk, | ||
301 | list_entry); | ||
302 | list_del(&chunk->list_entry); | ||
303 | |||
304 | gk20a_free(&a->source_allocator, chunk->base); | ||
305 | kfree(chunk); | ||
306 | } | ||
307 | |||
308 | kfree(alloc); | ||
309 | } | ||
310 | |||
311 | /* | 608 | /* |
312 | * Note: this will remove the gk20a_page_alloc struct from the RB tree | 609 | * Note: this will remove the gk20a_page_alloc struct from the RB tree |
313 | * if it's found. | 610 | * if it's found. |
@@ -331,14 +628,18 @@ static void gk20a_page_free(struct gk20a_allocator *__a, u64 base) | |||
331 | } | 628 | } |
332 | 629 | ||
333 | a->nr_frees++; | 630 | a->nr_frees++; |
334 | a->pages_freed += (alloc->length >> a->page_shift); | ||
335 | 631 | ||
336 | /* | 632 | /* |
337 | * Frees *alloc. | 633 | * Frees *alloc. |
338 | */ | 634 | */ |
339 | __gk20a_free_pages(a, alloc); | 635 | if (alloc->slab_page) { |
636 | __gk20a_free_slab(a, alloc); | ||
637 | } else { | ||
638 | a->pages_freed += (alloc->length >> a->page_shift); | ||
639 | __gk20a_free_pages(a, alloc, true); | ||
640 | } | ||
340 | 641 | ||
341 | palloc_dbg(a, "Free 0x%010llx id=0x%010llx\n", | 642 | palloc_dbg(a, "Free 0x%llx id=0x%010llx\n", |
342 | alloc->length, alloc->base); | 643 | alloc->length, alloc->base); |
343 | 644 | ||
344 | done: | 645 | done: |
@@ -439,7 +740,7 @@ static void gk20a_page_free_fixed(struct gk20a_allocator *__a, | |||
439 | * allocs. This would have to be updated if the underlying | 740 | * allocs. This would have to be updated if the underlying |
440 | * allocator were to change. | 741 | * allocator were to change. |
441 | */ | 742 | */ |
442 | __gk20a_free_pages(a, alloc); | 743 | __gk20a_free_pages(a, alloc, true); |
443 | 744 | ||
444 | palloc_dbg(a, "Free [fixed] 0x%010llx + 0x%llx\n", | 745 | palloc_dbg(a, "Free [fixed] 0x%010llx + 0x%llx\n", |
445 | alloc->base, alloc->length); | 746 | alloc->base, alloc->length); |
@@ -464,6 +765,7 @@ static void gk20a_page_print_stats(struct gk20a_allocator *__a, | |||
464 | struct seq_file *s, int lock) | 765 | struct seq_file *s, int lock) |
465 | { | 766 | { |
466 | struct gk20a_page_allocator *a = page_allocator(__a); | 767 | struct gk20a_page_allocator *a = page_allocator(__a); |
768 | int i; | ||
467 | 769 | ||
468 | if (lock) | 770 | if (lock) |
469 | alloc_lock(__a); | 771 | alloc_lock(__a); |
@@ -473,12 +775,33 @@ static void gk20a_page_print_stats(struct gk20a_allocator *__a, | |||
473 | __alloc_pstat(s, __a, " frees %lld\n", a->nr_frees); | 775 | __alloc_pstat(s, __a, " frees %lld\n", a->nr_frees); |
474 | __alloc_pstat(s, __a, " fixed_allocs %lld\n", a->nr_fixed_allocs); | 776 | __alloc_pstat(s, __a, " fixed_allocs %lld\n", a->nr_fixed_allocs); |
475 | __alloc_pstat(s, __a, " fixed_frees %lld\n", a->nr_fixed_frees); | 777 | __alloc_pstat(s, __a, " fixed_frees %lld\n", a->nr_fixed_frees); |
778 | __alloc_pstat(s, __a, " slab_allocs %lld\n", a->nr_slab_allocs); | ||
779 | __alloc_pstat(s, __a, " slab_frees %lld\n", a->nr_slab_frees); | ||
476 | __alloc_pstat(s, __a, " pages alloced %lld\n", a->pages_alloced); | 780 | __alloc_pstat(s, __a, " pages alloced %lld\n", a->pages_alloced); |
477 | __alloc_pstat(s, __a, " pages freed %lld\n", a->pages_freed); | 781 | __alloc_pstat(s, __a, " pages freed %lld\n", a->pages_freed); |
478 | __alloc_pstat(s, __a, "\n"); | 782 | __alloc_pstat(s, __a, "\n"); |
783 | |||
784 | /* | ||
785 | * Slab info. | ||
786 | */ | ||
787 | if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES) { | ||
788 | __alloc_pstat(s, __a, "Slabs:\n"); | ||
789 | __alloc_pstat(s, __a, " size empty partial full\n"); | ||
790 | __alloc_pstat(s, __a, " ---- ----- ------- ----\n"); | ||
791 | |||
792 | for (i = 0; i < a->nr_slabs; i++) { | ||
793 | struct page_alloc_slab *slab = &a->slabs[i]; | ||
794 | |||
795 | __alloc_pstat(s, __a, " %-9u %-9d %-9u %u\n", | ||
796 | slab->slab_size, | ||
797 | slab->nr_empty, slab->nr_partial, | ||
798 | slab->nr_full); | ||
799 | } | ||
800 | __alloc_pstat(s, __a, "\n"); | ||
801 | } | ||
802 | |||
479 | __alloc_pstat(s, __a, "Source alloc: %s\n", | 803 | __alloc_pstat(s, __a, "Source alloc: %s\n", |
480 | a->source_allocator.name); | 804 | a->source_allocator.name); |
481 | |||
482 | gk20a_alloc_print_stats(&a->source_allocator, s, lock); | 805 | gk20a_alloc_print_stats(&a->source_allocator, s, lock); |
483 | 806 | ||
484 | if (lock) | 807 | if (lock) |
@@ -506,6 +829,43 @@ static const struct gk20a_allocator_ops page_ops = { | |||
506 | .print_stats = gk20a_page_print_stats, | 829 | .print_stats = gk20a_page_print_stats, |
507 | }; | 830 | }; |
508 | 831 | ||
832 | /* | ||
833 | * nr_slabs is computed as follows: divide page_size by 4096 to get number of | ||
834 | * 4k pages in page_size. Then take the base 2 log of that to get number of | ||
835 | * slabs. For 64k page_size that works on like: | ||
836 | * | ||
837 | * 1024*64 / 1024*4 = 16 | ||
838 | * ilog2(16) = 4 | ||
839 | * | ||
840 | * That gives buckets of 1, 2, 4, and 8 pages (i.e 4k, 8k, 16k, 32k). | ||
841 | */ | ||
842 | static int gk20a_page_alloc_init_slabs(struct gk20a_page_allocator *a) | ||
843 | { | ||
844 | size_t nr_slabs = ilog2(a->page_size >> 12); | ||
845 | int i; | ||
846 | |||
847 | a->slabs = kcalloc(nr_slabs, | ||
848 | sizeof(struct page_alloc_slab), | ||
849 | GFP_KERNEL); | ||
850 | if (!a->slabs) | ||
851 | return -ENOMEM; | ||
852 | a->nr_slabs = nr_slabs; | ||
853 | |||
854 | for (i = 0; i < nr_slabs; i++) { | ||
855 | struct page_alloc_slab *slab = &a->slabs[i]; | ||
856 | |||
857 | slab->slab_size = SZ_4K * (1 << i); | ||
858 | INIT_LIST_HEAD(&slab->empty); | ||
859 | INIT_LIST_HEAD(&slab->partial); | ||
860 | INIT_LIST_HEAD(&slab->full); | ||
861 | slab->nr_empty = 0; | ||
862 | slab->nr_partial = 0; | ||
863 | slab->nr_full = 0; | ||
864 | } | ||
865 | |||
866 | return 0; | ||
867 | } | ||
868 | |||
509 | int gk20a_page_allocator_init(struct gk20a_allocator *__a, | 869 | int gk20a_page_allocator_init(struct gk20a_allocator *__a, |
510 | const char *name, u64 base, u64 length, | 870 | const char *name, u64 base, u64 length, |
511 | u64 blk_size, u64 flags) | 871 | u64 blk_size, u64 flags) |
@@ -519,11 +879,17 @@ int gk20a_page_allocator_init(struct gk20a_allocator *__a, | |||
519 | page_alloc_cache = KMEM_CACHE(gk20a_page_alloc, 0); | 879 | page_alloc_cache = KMEM_CACHE(gk20a_page_alloc, 0); |
520 | if (!page_alloc_chunk_cache) | 880 | if (!page_alloc_chunk_cache) |
521 | page_alloc_chunk_cache = KMEM_CACHE(page_alloc_chunk, 0); | 881 | page_alloc_chunk_cache = KMEM_CACHE(page_alloc_chunk, 0); |
882 | if (!page_alloc_slab_page_cache) | ||
883 | page_alloc_slab_page_cache = | ||
884 | KMEM_CACHE(page_alloc_slab_page, 0); | ||
522 | mutex_unlock(&meta_data_cache_lock); | 885 | mutex_unlock(&meta_data_cache_lock); |
523 | 886 | ||
524 | if (!page_alloc_cache || !page_alloc_chunk_cache) | 887 | if (!page_alloc_cache || !page_alloc_chunk_cache) |
525 | return -ENOMEM; | 888 | return -ENOMEM; |
526 | 889 | ||
890 | if (blk_size < SZ_4K) | ||
891 | return -EINVAL; | ||
892 | |||
527 | a = kzalloc(sizeof(struct gk20a_page_allocator), GFP_KERNEL); | 893 | a = kzalloc(sizeof(struct gk20a_page_allocator), GFP_KERNEL); |
528 | if (!a) | 894 | if (!a) |
529 | return -ENOMEM; | 895 | return -ENOMEM; |
@@ -540,6 +906,12 @@ int gk20a_page_allocator_init(struct gk20a_allocator *__a, | |||
540 | a->owner = __a; | 906 | a->owner = __a; |
541 | a->flags = flags; | 907 | a->flags = flags; |
542 | 908 | ||
909 | if (flags & GPU_ALLOC_4K_VIDMEM_PAGES && blk_size > SZ_4K) { | ||
910 | err = gk20a_page_alloc_init_slabs(a); | ||
911 | if (err) | ||
912 | goto fail; | ||
913 | } | ||
914 | |||
543 | snprintf(buddy_name, sizeof(buddy_name), "%s-src", name); | 915 | snprintf(buddy_name, sizeof(buddy_name), "%s-src", name); |
544 | 916 | ||
545 | err = gk20a_buddy_allocator_init(&a->source_allocator, buddy_name, base, | 917 | err = gk20a_buddy_allocator_init(&a->source_allocator, buddy_name, base, |
@@ -553,6 +925,7 @@ int gk20a_page_allocator_init(struct gk20a_allocator *__a, | |||
553 | palloc_dbg(a, " size 0x%llx\n", a->length); | 925 | palloc_dbg(a, " size 0x%llx\n", a->length); |
554 | palloc_dbg(a, " page_size 0x%llx\n", a->page_size); | 926 | palloc_dbg(a, " page_size 0x%llx\n", a->page_size); |
555 | palloc_dbg(a, " flags 0x%llx\n", a->flags); | 927 | palloc_dbg(a, " flags 0x%llx\n", a->flags); |
928 | palloc_dbg(a, " slabs: %d\n", a->nr_slabs); | ||
556 | 929 | ||
557 | return 0; | 930 | return 0; |
558 | 931 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 1bacb70a..df58feb2 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -925,8 +925,11 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm) | |||
925 | bootstrap_base, bootstrap_size, | 925 | bootstrap_base, bootstrap_size, |
926 | SZ_4K, 0); | 926 | SZ_4K, 0); |
927 | 927 | ||
928 | err = gk20a_page_allocator_init(&g->mm.vidmem.allocator, "vidmem", | 928 | err = gk20a_page_allocator_init(&g->mm.vidmem.allocator, |
929 | base, size - base, default_page_size, 0); | 929 | "vidmem", |
930 | base, size - base, | ||
931 | default_page_size, | ||
932 | GPU_ALLOC_4K_VIDMEM_PAGES); | ||
930 | if (err) { | 933 | if (err) { |
931 | gk20a_err(d, "Failed to register vidmem for size %zu: %d", | 934 | gk20a_err(d, "Failed to register vidmem for size %zu: %d", |
932 | size, err); | 935 | size, err); |
diff --git a/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h index 3d4e3c43..7d7f43c2 100644 --- a/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h +++ b/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h | |||
@@ -19,12 +19,63 @@ | |||
19 | 19 | ||
20 | #include <linux/list.h> | 20 | #include <linux/list.h> |
21 | #include <linux/rbtree.h> | 21 | #include <linux/rbtree.h> |
22 | #include <gk20a/gk20a_allocator.h> | ||
23 | 22 | ||
24 | #include "gk20a_allocator.h" | 23 | #include "gk20a_allocator.h" |
25 | 24 | ||
26 | struct gk20a_allocator; | 25 | struct gk20a_allocator; |
27 | 26 | ||
27 | /* | ||
28 | * This allocator implements the ability to do SLAB style allocation since the | ||
29 | * GPU has two page sizes available - 4k and 64k/128k. When the default | ||
30 | * granularity is the large page size (64k/128k) small allocations become very | ||
31 | * space inefficient. This is most notable in PDE and PTE blocks which are 4k | ||
32 | * in size. | ||
33 | * | ||
34 | * Thus we need the ability to suballocate in 64k pages. The way we do this for | ||
35 | * the GPU is as follows. We have several buckets for sub-64K allocations: | ||
36 | * | ||
37 | * B0 - 4k | ||
38 | * B1 - 8k | ||
39 | * B3 - 16k | ||
40 | * B4 - 32k | ||
41 | * B5 - 64k (for when large pages are 128k) | ||
42 | * | ||
43 | * When an allocation comes in for less than the large page size (from now on | ||
44 | * assumed to be 64k) the allocation is satisfied by one of the buckets. | ||
45 | */ | ||
46 | struct page_alloc_slab { | ||
47 | struct list_head empty; | ||
48 | struct list_head partial; | ||
49 | struct list_head full; | ||
50 | |||
51 | int nr_empty; | ||
52 | int nr_partial; | ||
53 | int nr_full; | ||
54 | |||
55 | u32 slab_size; | ||
56 | }; | ||
57 | |||
58 | enum slab_page_state { | ||
59 | SP_EMPTY, | ||
60 | SP_PARTIAL, | ||
61 | SP_FULL, | ||
62 | SP_NONE | ||
63 | }; | ||
64 | |||
65 | struct page_alloc_slab_page { | ||
66 | unsigned long bitmap; | ||
67 | u64 page_addr; | ||
68 | u32 slab_size; | ||
69 | |||
70 | u32 nr_objects; | ||
71 | u32 nr_objects_alloced; | ||
72 | |||
73 | enum slab_page_state state; | ||
74 | |||
75 | struct page_alloc_slab *owner; | ||
76 | struct list_head list_entry; | ||
77 | }; | ||
78 | |||
28 | struct page_alloc_chunk { | 79 | struct page_alloc_chunk { |
29 | struct list_head list_entry; | 80 | struct list_head list_entry; |
30 | 81 | ||
@@ -34,7 +85,7 @@ struct page_alloc_chunk { | |||
34 | 85 | ||
35 | /* | 86 | /* |
36 | * Struct to handle internal management of page allocation. It holds a list | 87 | * Struct to handle internal management of page allocation. It holds a list |
37 | * of the chunks of page that make up the overall allocation - much like a | 88 | * of the chunks of pages that make up the overall allocation - much like a |
38 | * scatter gather table. | 89 | * scatter gather table. |
39 | */ | 90 | */ |
40 | struct gk20a_page_alloc { | 91 | struct gk20a_page_alloc { |
@@ -44,13 +95,20 @@ struct gk20a_page_alloc { | |||
44 | u64 length; | 95 | u64 length; |
45 | 96 | ||
46 | /* | 97 | /* |
47 | * Only useful for the RB tree - since the alloc will have discontiguous | 98 | * Only useful for the RB tree - since the alloc may have discontiguous |
48 | * pages the base is essentially irrelevant except for the fact that it | 99 | * pages the base is essentially irrelevant except for the fact that it |
49 | * is guarenteed to be unique. | 100 | * is guarenteed to be unique. |
50 | */ | 101 | */ |
51 | u64 base; | 102 | u64 base; |
52 | 103 | ||
53 | struct rb_node tree_entry; | 104 | struct rb_node tree_entry; |
105 | |||
106 | /* | ||
107 | * Set if this is a slab alloc. Points back to the slab page that owns | ||
108 | * this particular allocation. nr_chunks will always be 1 if this is | ||
109 | * set. | ||
110 | */ | ||
111 | struct page_alloc_slab_page *slab_page; | ||
54 | }; | 112 | }; |
55 | 113 | ||
56 | struct gk20a_page_allocator { | 114 | struct gk20a_page_allocator { |
@@ -73,6 +131,9 @@ struct gk20a_page_allocator { | |||
73 | 131 | ||
74 | struct rb_root allocs; /* Outstanding allocations. */ | 132 | struct rb_root allocs; /* Outstanding allocations. */ |
75 | 133 | ||
134 | struct page_alloc_slab *slabs; | ||
135 | int nr_slabs; | ||
136 | |||
76 | u64 flags; | 137 | u64 flags; |
77 | 138 | ||
78 | /* | 139 | /* |
@@ -82,6 +143,8 @@ struct gk20a_page_allocator { | |||
82 | u64 nr_frees; | 143 | u64 nr_frees; |
83 | u64 nr_fixed_allocs; | 144 | u64 nr_fixed_allocs; |
84 | u64 nr_fixed_frees; | 145 | u64 nr_fixed_frees; |
146 | u64 nr_slab_allocs; | ||
147 | u64 nr_slab_frees; | ||
85 | u64 pages_alloced; | 148 | u64 pages_alloced; |
86 | u64 pages_freed; | 149 | u64 pages_freed; |
87 | }; | 150 | }; |