diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common/mm/page_allocator.c')
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/page_allocator.c | 1047 |
1 files changed, 1047 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c new file mode 100644 index 00000000..d5ce5d8e --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c | |||
@@ -0,0 +1,1047 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
20 | * DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #include <nvgpu/bitops.h> | ||
24 | #include <nvgpu/allocator.h> | ||
25 | #include <nvgpu/page_allocator.h> | ||
26 | #include <nvgpu/kmem.h> | ||
27 | #include <nvgpu/bug.h> | ||
28 | #include <nvgpu/log2.h> | ||
29 | |||
30 | #include "buddy_allocator_priv.h" | ||
31 | |||
32 | #define palloc_dbg(a, fmt, arg...) \ | ||
33 | alloc_dbg(palloc_owner(a), fmt, ##arg) | ||
34 | |||
35 | /* | ||
36 | * Since some Linux headers are still leaked into common code this is necessary | ||
37 | * for some builds. | ||
38 | */ | ||
39 | #ifdef PAGE_SIZE | ||
40 | #undef PAGE_SIZE | ||
41 | #endif | ||
42 | |||
43 | #ifdef PAGE_ALIGN | ||
44 | #undef PAGE_ALIGN | ||
45 | #endif | ||
46 | |||
47 | /* | ||
48 | * VIDMEM page size is 4k. | ||
49 | */ | ||
50 | #define PAGE_SIZE 0x1000 | ||
51 | #define PAGE_ALIGN(addr) ((addr + (PAGE_SIZE - 1)) & \ | ||
52 | ((typeof(addr)) ~(PAGE_SIZE - 1))) | ||
53 | |||
54 | /* | ||
55 | * Handle the book-keeping for these operations. | ||
56 | */ | ||
57 | static inline void add_slab_page_to_empty(struct page_alloc_slab *slab, | ||
58 | struct page_alloc_slab_page *page) | ||
59 | { | ||
60 | BUG_ON(page->state != SP_NONE); | ||
61 | nvgpu_list_add(&page->list_entry, &slab->empty); | ||
62 | slab->nr_empty++; | ||
63 | page->state = SP_EMPTY; | ||
64 | } | ||
65 | static inline void add_slab_page_to_partial(struct page_alloc_slab *slab, | ||
66 | struct page_alloc_slab_page *page) | ||
67 | { | ||
68 | BUG_ON(page->state != SP_NONE); | ||
69 | nvgpu_list_add(&page->list_entry, &slab->partial); | ||
70 | slab->nr_partial++; | ||
71 | page->state = SP_PARTIAL; | ||
72 | } | ||
73 | static inline void add_slab_page_to_full(struct page_alloc_slab *slab, | ||
74 | struct page_alloc_slab_page *page) | ||
75 | { | ||
76 | BUG_ON(page->state != SP_NONE); | ||
77 | nvgpu_list_add(&page->list_entry, &slab->full); | ||
78 | slab->nr_full++; | ||
79 | page->state = SP_FULL; | ||
80 | } | ||
81 | |||
82 | static inline void del_slab_page_from_empty(struct page_alloc_slab *slab, | ||
83 | struct page_alloc_slab_page *page) | ||
84 | { | ||
85 | nvgpu_list_del(&page->list_entry); | ||
86 | slab->nr_empty--; | ||
87 | page->state = SP_NONE; | ||
88 | } | ||
89 | static inline void del_slab_page_from_partial(struct page_alloc_slab *slab, | ||
90 | struct page_alloc_slab_page *page) | ||
91 | { | ||
92 | nvgpu_list_del(&page->list_entry); | ||
93 | slab->nr_partial--; | ||
94 | page->state = SP_NONE; | ||
95 | } | ||
96 | static inline void del_slab_page_from_full(struct page_alloc_slab *slab, | ||
97 | struct page_alloc_slab_page *page) | ||
98 | { | ||
99 | nvgpu_list_del(&page->list_entry); | ||
100 | slab->nr_full--; | ||
101 | page->state = SP_NONE; | ||
102 | } | ||
103 | |||
104 | static u64 nvgpu_page_alloc_length(struct nvgpu_allocator *a) | ||
105 | { | ||
106 | struct nvgpu_page_allocator *va = a->priv; | ||
107 | |||
108 | return nvgpu_alloc_length(&va->source_allocator); | ||
109 | } | ||
110 | |||
111 | static u64 nvgpu_page_alloc_base(struct nvgpu_allocator *a) | ||
112 | { | ||
113 | struct nvgpu_page_allocator *va = a->priv; | ||
114 | |||
115 | return nvgpu_alloc_base(&va->source_allocator); | ||
116 | } | ||
117 | |||
118 | static int nvgpu_page_alloc_inited(struct nvgpu_allocator *a) | ||
119 | { | ||
120 | struct nvgpu_page_allocator *va = a->priv; | ||
121 | |||
122 | return nvgpu_alloc_initialized(&va->source_allocator); | ||
123 | } | ||
124 | |||
125 | static u64 nvgpu_page_alloc_end(struct nvgpu_allocator *a) | ||
126 | { | ||
127 | struct nvgpu_page_allocator *va = a->priv; | ||
128 | |||
129 | return nvgpu_alloc_end(&va->source_allocator); | ||
130 | } | ||
131 | |||
132 | static u64 nvgpu_page_alloc_space(struct nvgpu_allocator *a) | ||
133 | { | ||
134 | struct nvgpu_page_allocator *va = a->priv; | ||
135 | |||
136 | return nvgpu_alloc_space(&va->source_allocator); | ||
137 | } | ||
138 | |||
139 | static int nvgpu_page_reserve_co(struct nvgpu_allocator *a, | ||
140 | struct nvgpu_alloc_carveout *co) | ||
141 | { | ||
142 | struct nvgpu_page_allocator *va = a->priv; | ||
143 | |||
144 | return nvgpu_alloc_reserve_carveout(&va->source_allocator, co); | ||
145 | } | ||
146 | |||
147 | static void nvgpu_page_release_co(struct nvgpu_allocator *a, | ||
148 | struct nvgpu_alloc_carveout *co) | ||
149 | { | ||
150 | struct nvgpu_page_allocator *va = a->priv; | ||
151 | |||
152 | nvgpu_alloc_release_carveout(&va->source_allocator, co); | ||
153 | } | ||
154 | |||
155 | static void *nvgpu_page_alloc_sgl_next(void *sgl) | ||
156 | { | ||
157 | struct nvgpu_mem_sgl *nvgpu_sgl = sgl; | ||
158 | |||
159 | return nvgpu_sgl->next; | ||
160 | } | ||
161 | |||
162 | static u64 nvgpu_page_alloc_sgl_phys(void *sgl) | ||
163 | { | ||
164 | struct nvgpu_mem_sgl *nvgpu_sgl = sgl; | ||
165 | |||
166 | return nvgpu_sgl->phys; | ||
167 | } | ||
168 | |||
169 | static u64 nvgpu_page_alloc_sgl_dma(void *sgl) | ||
170 | { | ||
171 | struct nvgpu_mem_sgl *nvgpu_sgl = sgl; | ||
172 | |||
173 | return nvgpu_sgl->dma; | ||
174 | } | ||
175 | |||
176 | static u64 nvgpu_page_alloc_sgl_length(void *sgl) | ||
177 | { | ||
178 | struct nvgpu_mem_sgl *nvgpu_sgl = sgl; | ||
179 | |||
180 | return nvgpu_sgl->length; | ||
181 | } | ||
182 | |||
183 | static u64 nvgpu_page_alloc_sgl_gpu_addr(struct gk20a *g, void *sgl, | ||
184 | struct nvgpu_gmmu_attrs *attrs) | ||
185 | { | ||
186 | struct nvgpu_mem_sgl *nvgpu_sgl = sgl; | ||
187 | |||
188 | return nvgpu_sgl->phys; | ||
189 | } | ||
190 | |||
191 | static void nvgpu_page_alloc_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt) | ||
192 | { | ||
193 | /* | ||
194 | * No-op here. The free is handled by the page_alloc free() functions. | ||
195 | */ | ||
196 | } | ||
197 | |||
198 | /* | ||
199 | * These implement the generic scatter gather ops for pages allocated | ||
200 | * by the page allocator. however, the primary aim for this, is of course, | ||
201 | * vidmem. | ||
202 | */ | ||
203 | static const struct nvgpu_sgt_ops page_alloc_sgl_ops = { | ||
204 | .sgl_next = nvgpu_page_alloc_sgl_next, | ||
205 | .sgl_phys = nvgpu_page_alloc_sgl_phys, | ||
206 | .sgl_dma = nvgpu_page_alloc_sgl_dma, | ||
207 | .sgl_length = nvgpu_page_alloc_sgl_length, | ||
208 | .sgl_gpu_addr = nvgpu_page_alloc_sgl_gpu_addr, | ||
209 | .sgt_free = nvgpu_page_alloc_sgt_free, | ||
210 | }; | ||
211 | |||
212 | /* | ||
213 | * This actually frees the sgl memory. Used by the page_alloc free() functions. | ||
214 | */ | ||
215 | static void nvgpu_page_alloc_sgl_proper_free(struct gk20a *g, | ||
216 | struct nvgpu_mem_sgl *sgl) | ||
217 | { | ||
218 | struct nvgpu_mem_sgl *next; | ||
219 | |||
220 | while (sgl) { | ||
221 | next = sgl->next; | ||
222 | nvgpu_kfree(g, sgl); | ||
223 | sgl = next; | ||
224 | } | ||
225 | } | ||
226 | |||
227 | static void __nvgpu_free_pages(struct nvgpu_page_allocator *a, | ||
228 | struct nvgpu_page_alloc *alloc, | ||
229 | bool free_buddy_alloc) | ||
230 | { | ||
231 | struct nvgpu_mem_sgl *sgl = alloc->sgt.sgl; | ||
232 | |||
233 | if (free_buddy_alloc) { | ||
234 | while (sgl) { | ||
235 | nvgpu_free(&a->source_allocator, | ||
236 | nvgpu_sgt_get_phys(&alloc->sgt, sgl)); | ||
237 | sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl); | ||
238 | } | ||
239 | } | ||
240 | |||
241 | nvgpu_page_alloc_sgl_proper_free(a->owner->g, sgl); | ||
242 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); | ||
243 | } | ||
244 | |||
245 | static int __insert_page_alloc(struct nvgpu_page_allocator *a, | ||
246 | struct nvgpu_page_alloc *alloc) | ||
247 | { | ||
248 | alloc->tree_entry.key_start = alloc->base; | ||
249 | alloc->tree_entry.key_end = alloc->base + alloc->length; | ||
250 | |||
251 | nvgpu_rbtree_insert(&alloc->tree_entry, &a->allocs); | ||
252 | return 0; | ||
253 | } | ||
254 | |||
255 | static struct nvgpu_page_alloc *__find_page_alloc( | ||
256 | struct nvgpu_page_allocator *a, | ||
257 | u64 addr) | ||
258 | { | ||
259 | struct nvgpu_page_alloc *alloc; | ||
260 | struct nvgpu_rbtree_node *node = NULL; | ||
261 | |||
262 | nvgpu_rbtree_search(addr, &node, a->allocs); | ||
263 | if (!node) | ||
264 | return NULL; | ||
265 | |||
266 | alloc = nvgpu_page_alloc_from_rbtree_node(node); | ||
267 | |||
268 | nvgpu_rbtree_unlink(node, &a->allocs); | ||
269 | |||
270 | return alloc; | ||
271 | } | ||
272 | |||
273 | static struct page_alloc_slab_page *alloc_slab_page( | ||
274 | struct nvgpu_page_allocator *a, | ||
275 | struct page_alloc_slab *slab) | ||
276 | { | ||
277 | struct page_alloc_slab_page *slab_page; | ||
278 | |||
279 | slab_page = nvgpu_kmem_cache_alloc(a->slab_page_cache); | ||
280 | if (!slab_page) { | ||
281 | palloc_dbg(a, "OOM: unable to alloc slab_page struct!\n"); | ||
282 | return NULL; | ||
283 | } | ||
284 | |||
285 | memset(slab_page, 0, sizeof(*slab_page)); | ||
286 | |||
287 | slab_page->page_addr = nvgpu_alloc(&a->source_allocator, a->page_size); | ||
288 | if (!slab_page->page_addr) { | ||
289 | nvgpu_kmem_cache_free(a->slab_page_cache, slab_page); | ||
290 | palloc_dbg(a, "OOM: vidmem is full!\n"); | ||
291 | return NULL; | ||
292 | } | ||
293 | |||
294 | nvgpu_init_list_node(&slab_page->list_entry); | ||
295 | slab_page->slab_size = slab->slab_size; | ||
296 | slab_page->nr_objects = (u32)a->page_size / slab->slab_size; | ||
297 | slab_page->nr_objects_alloced = 0; | ||
298 | slab_page->owner = slab; | ||
299 | slab_page->state = SP_NONE; | ||
300 | |||
301 | a->pages_alloced++; | ||
302 | |||
303 | palloc_dbg(a, "Allocated new slab page @ 0x%012llx size=%u\n", | ||
304 | slab_page->page_addr, slab_page->slab_size); | ||
305 | |||
306 | return slab_page; | ||
307 | } | ||
308 | |||
309 | static void free_slab_page(struct nvgpu_page_allocator *a, | ||
310 | struct page_alloc_slab_page *slab_page) | ||
311 | { | ||
312 | palloc_dbg(a, "Freeing slab page @ 0x%012llx\n", slab_page->page_addr); | ||
313 | |||
314 | BUG_ON((slab_page->state != SP_NONE && slab_page->state != SP_EMPTY) || | ||
315 | slab_page->nr_objects_alloced != 0 || | ||
316 | slab_page->bitmap != 0); | ||
317 | |||
318 | nvgpu_free(&a->source_allocator, slab_page->page_addr); | ||
319 | a->pages_freed++; | ||
320 | |||
321 | nvgpu_kmem_cache_free(a->slab_page_cache, slab_page); | ||
322 | } | ||
323 | |||
324 | /* | ||
325 | * This expects @alloc to have 1 empty sgl_entry ready for usage. | ||
326 | */ | ||
327 | static int __do_slab_alloc(struct nvgpu_page_allocator *a, | ||
328 | struct page_alloc_slab *slab, | ||
329 | struct nvgpu_page_alloc *alloc) | ||
330 | { | ||
331 | struct page_alloc_slab_page *slab_page = NULL; | ||
332 | struct nvgpu_mem_sgl *sgl; | ||
333 | unsigned long offs; | ||
334 | |||
335 | /* | ||
336 | * Check the partial and empty lists to see if we have some space | ||
337 | * readily available. Take the slab_page out of what ever list it | ||
338 | * was in since it may be put back into a different list later. | ||
339 | */ | ||
340 | if (!nvgpu_list_empty(&slab->partial)) { | ||
341 | slab_page = nvgpu_list_first_entry(&slab->partial, | ||
342 | page_alloc_slab_page, | ||
343 | list_entry); | ||
344 | del_slab_page_from_partial(slab, slab_page); | ||
345 | } else if (!nvgpu_list_empty(&slab->empty)) { | ||
346 | slab_page = nvgpu_list_first_entry(&slab->empty, | ||
347 | page_alloc_slab_page, | ||
348 | list_entry); | ||
349 | del_slab_page_from_empty(slab, slab_page); | ||
350 | } | ||
351 | |||
352 | if (!slab_page) { | ||
353 | slab_page = alloc_slab_page(a, slab); | ||
354 | if (!slab_page) | ||
355 | return -ENOMEM; | ||
356 | } | ||
357 | |||
358 | /* | ||
359 | * We now have a slab_page. Do the alloc. | ||
360 | */ | ||
361 | offs = bitmap_find_next_zero_area(&slab_page->bitmap, | ||
362 | slab_page->nr_objects, | ||
363 | 0, 1, 0); | ||
364 | if (offs >= slab_page->nr_objects) { | ||
365 | WARN(1, "Empty/partial slab with no free objects?"); | ||
366 | |||
367 | /* Add the buggy page to the full list... This isn't ideal. */ | ||
368 | add_slab_page_to_full(slab, slab_page); | ||
369 | return -ENOMEM; | ||
370 | } | ||
371 | |||
372 | bitmap_set(&slab_page->bitmap, offs, 1); | ||
373 | slab_page->nr_objects_alloced++; | ||
374 | |||
375 | if (slab_page->nr_objects_alloced < slab_page->nr_objects) | ||
376 | add_slab_page_to_partial(slab, slab_page); | ||
377 | else if (slab_page->nr_objects_alloced == slab_page->nr_objects) | ||
378 | add_slab_page_to_full(slab, slab_page); | ||
379 | else | ||
380 | BUG(); /* Should be impossible to hit this. */ | ||
381 | |||
382 | /* | ||
383 | * Handle building the nvgpu_page_alloc struct. We expect one sgl | ||
384 | * to be present. | ||
385 | */ | ||
386 | alloc->slab_page = slab_page; | ||
387 | alloc->nr_chunks = 1; | ||
388 | alloc->length = slab_page->slab_size; | ||
389 | alloc->base = slab_page->page_addr + (offs * slab_page->slab_size); | ||
390 | |||
391 | sgl = alloc->sgt.sgl; | ||
392 | sgl->phys = alloc->base; | ||
393 | sgl->dma = alloc->base; | ||
394 | sgl->length = alloc->length; | ||
395 | sgl->next = NULL; | ||
396 | |||
397 | return 0; | ||
398 | } | ||
399 | |||
400 | /* | ||
401 | * Allocate from a slab instead of directly from the page allocator. | ||
402 | */ | ||
403 | static struct nvgpu_page_alloc *__nvgpu_alloc_slab( | ||
404 | struct nvgpu_page_allocator *a, u64 len) | ||
405 | { | ||
406 | int err, slab_nr; | ||
407 | struct page_alloc_slab *slab; | ||
408 | struct nvgpu_page_alloc *alloc = NULL; | ||
409 | struct nvgpu_mem_sgl *sgl = NULL; | ||
410 | |||
411 | /* | ||
412 | * Align the length to a page and then divide by the page size (4k for | ||
413 | * this code). ilog2() of that then gets us the correct slab to use. | ||
414 | */ | ||
415 | slab_nr = (int)ilog2(PAGE_ALIGN(len) >> 12); | ||
416 | slab = &a->slabs[slab_nr]; | ||
417 | |||
418 | alloc = nvgpu_kmem_cache_alloc(a->alloc_cache); | ||
419 | if (!alloc) { | ||
420 | palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n"); | ||
421 | goto fail; | ||
422 | } | ||
423 | |||
424 | alloc->sgt.ops = &page_alloc_sgl_ops; | ||
425 | |||
426 | sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); | ||
427 | if (!sgl) { | ||
428 | palloc_dbg(a, "OOM: could not alloc sgl struct!\n"); | ||
429 | goto fail; | ||
430 | } | ||
431 | |||
432 | alloc->sgt.sgl = sgl; | ||
433 | err = __do_slab_alloc(a, slab, alloc); | ||
434 | if (err) | ||
435 | goto fail; | ||
436 | |||
437 | palloc_dbg(a, "Alloc 0x%04llx sr=%d id=0x%010llx [slab]\n", | ||
438 | len, slab_nr, alloc->base); | ||
439 | a->nr_slab_allocs++; | ||
440 | |||
441 | return alloc; | ||
442 | |||
443 | fail: | ||
444 | if (alloc) | ||
445 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); | ||
446 | if (sgl) | ||
447 | nvgpu_kfree(a->owner->g, sgl); | ||
448 | return NULL; | ||
449 | } | ||
450 | |||
451 | static void __nvgpu_free_slab(struct nvgpu_page_allocator *a, | ||
452 | struct nvgpu_page_alloc *alloc) | ||
453 | { | ||
454 | struct page_alloc_slab_page *slab_page = alloc->slab_page; | ||
455 | struct page_alloc_slab *slab = slab_page->owner; | ||
456 | enum slab_page_state new_state; | ||
457 | int offs; | ||
458 | |||
459 | offs = (u32)(alloc->base - slab_page->page_addr) / slab_page->slab_size; | ||
460 | bitmap_clear(&slab_page->bitmap, offs, 1); | ||
461 | |||
462 | slab_page->nr_objects_alloced--; | ||
463 | |||
464 | if (slab_page->nr_objects_alloced == 0) | ||
465 | new_state = SP_EMPTY; | ||
466 | else | ||
467 | new_state = SP_PARTIAL; | ||
468 | |||
469 | /* | ||
470 | * Need to migrate the page to a different list. | ||
471 | */ | ||
472 | if (new_state != slab_page->state) { | ||
473 | /* Delete - can't be in empty. */ | ||
474 | if (slab_page->state == SP_PARTIAL) | ||
475 | del_slab_page_from_partial(slab, slab_page); | ||
476 | else | ||
477 | del_slab_page_from_full(slab, slab_page); | ||
478 | |||
479 | /* And add. */ | ||
480 | if (new_state == SP_EMPTY) { | ||
481 | if (nvgpu_list_empty(&slab->empty)) | ||
482 | add_slab_page_to_empty(slab, slab_page); | ||
483 | else | ||
484 | free_slab_page(a, slab_page); | ||
485 | } else { | ||
486 | add_slab_page_to_partial(slab, slab_page); | ||
487 | } | ||
488 | } | ||
489 | |||
490 | /* | ||
491 | * Now handle the page_alloc. | ||
492 | */ | ||
493 | __nvgpu_free_pages(a, alloc, false); | ||
494 | a->nr_slab_frees++; | ||
495 | |||
496 | return; | ||
497 | } | ||
498 | |||
499 | /* | ||
500 | * Allocate physical pages. Since the underlying allocator is a buddy allocator | ||
501 | * the returned pages are always contiguous. However, since there could be | ||
502 | * fragmentation in the space this allocator will collate smaller non-contiguous | ||
503 | * allocations together if necessary. | ||
504 | */ | ||
505 | static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages( | ||
506 | struct nvgpu_page_allocator *a, u64 pages) | ||
507 | { | ||
508 | struct nvgpu_page_alloc *alloc; | ||
509 | struct nvgpu_mem_sgl *sgl, *prev_sgl = NULL; | ||
510 | u64 max_chunk_len = pages << a->page_shift; | ||
511 | int i = 0; | ||
512 | |||
513 | alloc = nvgpu_kmem_cache_alloc(a->alloc_cache); | ||
514 | if (!alloc) | ||
515 | goto fail; | ||
516 | |||
517 | memset(alloc, 0, sizeof(*alloc)); | ||
518 | |||
519 | alloc->length = pages << a->page_shift; | ||
520 | alloc->sgt.ops = &page_alloc_sgl_ops; | ||
521 | |||
522 | while (pages) { | ||
523 | u64 chunk_addr = 0; | ||
524 | u64 chunk_pages = (u64)1 << __fls(pages); | ||
525 | u64 chunk_len = chunk_pages << a->page_shift; | ||
526 | |||
527 | /* | ||
528 | * Take care of the possibility that the allocation must be | ||
529 | * contiguous. If this is not the first iteration then that | ||
530 | * means the first iteration failed to alloc the entire | ||
531 | * requested size. The buddy allocator guarantees any given | ||
532 | * single alloc is contiguous. | ||
533 | */ | ||
534 | if (a->flags & GPU_ALLOC_FORCE_CONTIG && i != 0) | ||
535 | goto fail_cleanup; | ||
536 | |||
537 | if (chunk_len > max_chunk_len) | ||
538 | chunk_len = max_chunk_len; | ||
539 | |||
540 | /* | ||
541 | * Keep attempting to allocate in smaller chunks until the alloc | ||
542 | * either succeeds or is smaller than the page_size of the | ||
543 | * allocator (i.e the allocator is OOM). | ||
544 | */ | ||
545 | do { | ||
546 | chunk_addr = nvgpu_alloc(&a->source_allocator, | ||
547 | chunk_len); | ||
548 | |||
549 | /* Divide by 2 and try again */ | ||
550 | if (!chunk_addr) { | ||
551 | palloc_dbg(a, "balloc failed: 0x%llx\n", | ||
552 | chunk_len); | ||
553 | chunk_len >>= 1; | ||
554 | max_chunk_len = chunk_len; | ||
555 | } | ||
556 | } while (!chunk_addr && chunk_len >= a->page_size); | ||
557 | |||
558 | chunk_pages = chunk_len >> a->page_shift; | ||
559 | |||
560 | if (!chunk_addr) { | ||
561 | palloc_dbg(a, "bailing @ 0x%llx\n", chunk_len); | ||
562 | goto fail_cleanup; | ||
563 | } | ||
564 | |||
565 | sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); | ||
566 | if (!sgl) { | ||
567 | nvgpu_free(&a->source_allocator, chunk_addr); | ||
568 | goto fail_cleanup; | ||
569 | } | ||
570 | |||
571 | pages -= chunk_pages; | ||
572 | |||
573 | sgl->phys = chunk_addr; | ||
574 | sgl->dma = chunk_addr; | ||
575 | sgl->length = chunk_len; | ||
576 | |||
577 | /* | ||
578 | * Build the singly linked list with a head node that is part of | ||
579 | * the list. | ||
580 | */ | ||
581 | if (prev_sgl) | ||
582 | prev_sgl->next = sgl; | ||
583 | else | ||
584 | alloc->sgt.sgl = sgl; | ||
585 | |||
586 | prev_sgl = sgl; | ||
587 | |||
588 | i++; | ||
589 | } | ||
590 | |||
591 | alloc->nr_chunks = i; | ||
592 | alloc->base = ((struct nvgpu_mem_sgl *)alloc->sgt.sgl)->phys; | ||
593 | |||
594 | return alloc; | ||
595 | |||
596 | fail_cleanup: | ||
597 | sgl = alloc->sgt.sgl; | ||
598 | while (sgl) { | ||
599 | struct nvgpu_mem_sgl *next = sgl->next; | ||
600 | |||
601 | nvgpu_free(&a->source_allocator, sgl->phys); | ||
602 | nvgpu_kfree(a->owner->g, sgl); | ||
603 | |||
604 | sgl = next; | ||
605 | } | ||
606 | |||
607 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); | ||
608 | fail: | ||
609 | return NULL; | ||
610 | } | ||
611 | |||
612 | static struct nvgpu_page_alloc *__nvgpu_alloc_pages( | ||
613 | struct nvgpu_page_allocator *a, u64 len) | ||
614 | { | ||
615 | struct nvgpu_page_alloc *alloc = NULL; | ||
616 | struct nvgpu_mem_sgl *sgl; | ||
617 | u64 pages; | ||
618 | int i = 0; | ||
619 | |||
620 | pages = ALIGN(len, a->page_size) >> a->page_shift; | ||
621 | |||
622 | alloc = __do_nvgpu_alloc_pages(a, pages); | ||
623 | if (!alloc) { | ||
624 | palloc_dbg(a, "Alloc 0x%llx (%llu) (failed)\n", | ||
625 | pages << a->page_shift, pages); | ||
626 | return NULL; | ||
627 | } | ||
628 | |||
629 | palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", | ||
630 | pages << a->page_shift, pages, alloc->base); | ||
631 | sgl = alloc->sgt.sgl; | ||
632 | while (sgl) { | ||
633 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", | ||
634 | i++, | ||
635 | nvgpu_sgt_get_phys(&alloc->sgt, sgl), | ||
636 | nvgpu_sgt_get_length(&alloc->sgt, sgl)); | ||
637 | sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl); | ||
638 | } | ||
639 | palloc_dbg(a, "Alloc done\n"); | ||
640 | |||
641 | return alloc; | ||
642 | } | ||
643 | |||
644 | /* | ||
645 | * Allocate enough pages to satisfy @len. Page size is determined at | ||
646 | * initialization of the allocator. | ||
647 | * | ||
648 | * The return is actually a pointer to a struct nvgpu_page_alloc pointer. This | ||
649 | * is because it doesn't make a lot of sense to return the address of the first | ||
650 | * page in the list of pages (since they could be discontiguous). This has | ||
651 | * precedent in the dma_alloc APIs, though, it's really just an annoying | ||
652 | * artifact of the fact that the nvgpu_alloc() API requires a u64 return type. | ||
653 | */ | ||
654 | static u64 nvgpu_page_alloc(struct nvgpu_allocator *__a, u64 len) | ||
655 | { | ||
656 | struct nvgpu_page_allocator *a = page_allocator(__a); | ||
657 | struct nvgpu_page_alloc *alloc = NULL; | ||
658 | u64 real_len; | ||
659 | |||
660 | /* | ||
661 | * If we want contig pages we have to round up to a power of two. It's | ||
662 | * easier to do that here than in the buddy allocator. | ||
663 | */ | ||
664 | real_len = a->flags & GPU_ALLOC_FORCE_CONTIG ? | ||
665 | roundup_pow_of_two(len) : len; | ||
666 | |||
667 | alloc_lock(__a); | ||
668 | if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES && | ||
669 | real_len <= (a->page_size / 2)) | ||
670 | alloc = __nvgpu_alloc_slab(a, real_len); | ||
671 | else | ||
672 | alloc = __nvgpu_alloc_pages(a, real_len); | ||
673 | |||
674 | if (!alloc) { | ||
675 | alloc_unlock(__a); | ||
676 | return 0; | ||
677 | } | ||
678 | |||
679 | __insert_page_alloc(a, alloc); | ||
680 | |||
681 | a->nr_allocs++; | ||
682 | if (real_len > a->page_size / 2) | ||
683 | a->pages_alloced += alloc->length >> a->page_shift; | ||
684 | alloc_unlock(__a); | ||
685 | |||
686 | if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) | ||
687 | return alloc->base; | ||
688 | else | ||
689 | return (u64) (uintptr_t) alloc; | ||
690 | } | ||
691 | |||
692 | /* | ||
693 | * Note: this will remove the nvgpu_page_alloc struct from the RB tree | ||
694 | * if it's found. | ||
695 | */ | ||
696 | static void nvgpu_page_free(struct nvgpu_allocator *__a, u64 base) | ||
697 | { | ||
698 | struct nvgpu_page_allocator *a = page_allocator(__a); | ||
699 | struct nvgpu_page_alloc *alloc; | ||
700 | |||
701 | alloc_lock(__a); | ||
702 | |||
703 | if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) | ||
704 | alloc = __find_page_alloc(a, base); | ||
705 | else | ||
706 | alloc = __find_page_alloc(a, | ||
707 | ((struct nvgpu_page_alloc *)(uintptr_t)base)->base); | ||
708 | |||
709 | if (!alloc) { | ||
710 | palloc_dbg(a, "Hrm, found no alloc?\n"); | ||
711 | goto done; | ||
712 | } | ||
713 | |||
714 | a->nr_frees++; | ||
715 | |||
716 | palloc_dbg(a, "Free 0x%llx id=0x%010llx\n", | ||
717 | alloc->length, alloc->base); | ||
718 | |||
719 | /* | ||
720 | * Frees *alloc. | ||
721 | */ | ||
722 | if (alloc->slab_page) { | ||
723 | __nvgpu_free_slab(a, alloc); | ||
724 | } else { | ||
725 | a->pages_freed += (alloc->length >> a->page_shift); | ||
726 | __nvgpu_free_pages(a, alloc, true); | ||
727 | } | ||
728 | |||
729 | done: | ||
730 | alloc_unlock(__a); | ||
731 | } | ||
732 | |||
733 | static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed( | ||
734 | struct nvgpu_page_allocator *a, u64 base, u64 length, u32 unused) | ||
735 | { | ||
736 | struct nvgpu_page_alloc *alloc; | ||
737 | struct nvgpu_mem_sgl *sgl; | ||
738 | |||
739 | alloc = nvgpu_kmem_cache_alloc(a->alloc_cache); | ||
740 | sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); | ||
741 | if (!alloc || !sgl) | ||
742 | goto fail; | ||
743 | |||
744 | alloc->sgt.ops = &page_alloc_sgl_ops; | ||
745 | alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0); | ||
746 | if (!alloc->base) { | ||
747 | WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base); | ||
748 | goto fail; | ||
749 | } | ||
750 | |||
751 | alloc->nr_chunks = 1; | ||
752 | alloc->length = length; | ||
753 | alloc->sgt.sgl = sgl; | ||
754 | |||
755 | sgl->phys = alloc->base; | ||
756 | sgl->dma = alloc->base; | ||
757 | sgl->length = length; | ||
758 | sgl->next = NULL; | ||
759 | |||
760 | return alloc; | ||
761 | |||
762 | fail: | ||
763 | if (sgl) | ||
764 | nvgpu_kfree(a->owner->g, sgl); | ||
765 | if (alloc) | ||
766 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); | ||
767 | return NULL; | ||
768 | } | ||
769 | |||
770 | /* | ||
771 | * @page_size is ignored. | ||
772 | */ | ||
773 | static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a, | ||
774 | u64 base, u64 len, u32 page_size) | ||
775 | { | ||
776 | struct nvgpu_page_allocator *a = page_allocator(__a); | ||
777 | struct nvgpu_page_alloc *alloc = NULL; | ||
778 | struct nvgpu_mem_sgl *sgl; | ||
779 | u64 aligned_len, pages; | ||
780 | int i = 0; | ||
781 | |||
782 | aligned_len = ALIGN(len, a->page_size); | ||
783 | pages = aligned_len >> a->page_shift; | ||
784 | |||
785 | alloc_lock(__a); | ||
786 | |||
787 | alloc = __nvgpu_alloc_pages_fixed(a, base, aligned_len, 0); | ||
788 | if (!alloc) { | ||
789 | alloc_unlock(__a); | ||
790 | return 0; | ||
791 | } | ||
792 | |||
793 | __insert_page_alloc(a, alloc); | ||
794 | alloc_unlock(__a); | ||
795 | |||
796 | palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n", | ||
797 | alloc->base, aligned_len, pages); | ||
798 | sgl = alloc->sgt.sgl; | ||
799 | while (sgl) { | ||
800 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", | ||
801 | i++, | ||
802 | nvgpu_sgt_get_phys(&alloc->sgt, sgl), | ||
803 | nvgpu_sgt_get_length(&alloc->sgt, sgl)); | ||
804 | sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl); | ||
805 | } | ||
806 | |||
807 | a->nr_fixed_allocs++; | ||
808 | a->pages_alloced += pages; | ||
809 | |||
810 | if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) | ||
811 | return alloc->base; | ||
812 | else | ||
813 | return (u64) (uintptr_t) alloc; | ||
814 | } | ||
815 | |||
816 | static void nvgpu_page_free_fixed(struct nvgpu_allocator *__a, | ||
817 | u64 base, u64 len) | ||
818 | { | ||
819 | struct nvgpu_page_allocator *a = page_allocator(__a); | ||
820 | struct nvgpu_page_alloc *alloc; | ||
821 | |||
822 | alloc_lock(__a); | ||
823 | |||
824 | if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) { | ||
825 | alloc = __find_page_alloc(a, base); | ||
826 | if (!alloc) | ||
827 | goto done; | ||
828 | } else { | ||
829 | alloc = (struct nvgpu_page_alloc *) (uintptr_t) base; | ||
830 | } | ||
831 | |||
832 | palloc_dbg(a, "Free [fixed] 0x%010llx + 0x%llx\n", | ||
833 | alloc->base, alloc->length); | ||
834 | |||
835 | a->nr_fixed_frees++; | ||
836 | a->pages_freed += (alloc->length >> a->page_shift); | ||
837 | |||
838 | /* | ||
839 | * This works for the time being since the buddy allocator | ||
840 | * uses the same free function for both fixed and regular | ||
841 | * allocs. This would have to be updated if the underlying | ||
842 | * allocator were to change. | ||
843 | */ | ||
844 | __nvgpu_free_pages(a, alloc, true); | ||
845 | |||
846 | done: | ||
847 | alloc_unlock(__a); | ||
848 | } | ||
849 | |||
850 | static void nvgpu_page_allocator_destroy(struct nvgpu_allocator *__a) | ||
851 | { | ||
852 | struct nvgpu_page_allocator *a = page_allocator(__a); | ||
853 | |||
854 | alloc_lock(__a); | ||
855 | nvgpu_kfree(nvgpu_alloc_to_gpu(__a), a); | ||
856 | __a->priv = NULL; | ||
857 | alloc_unlock(__a); | ||
858 | } | ||
859 | |||
860 | #ifdef __KERNEL__ | ||
861 | static void nvgpu_page_print_stats(struct nvgpu_allocator *__a, | ||
862 | struct seq_file *s, int lock) | ||
863 | { | ||
864 | struct nvgpu_page_allocator *a = page_allocator(__a); | ||
865 | int i; | ||
866 | |||
867 | if (lock) | ||
868 | alloc_lock(__a); | ||
869 | |||
870 | __alloc_pstat(s, __a, "Page allocator:\n"); | ||
871 | __alloc_pstat(s, __a, " allocs %lld\n", a->nr_allocs); | ||
872 | __alloc_pstat(s, __a, " frees %lld\n", a->nr_frees); | ||
873 | __alloc_pstat(s, __a, " fixed_allocs %lld\n", a->nr_fixed_allocs); | ||
874 | __alloc_pstat(s, __a, " fixed_frees %lld\n", a->nr_fixed_frees); | ||
875 | __alloc_pstat(s, __a, " slab_allocs %lld\n", a->nr_slab_allocs); | ||
876 | __alloc_pstat(s, __a, " slab_frees %lld\n", a->nr_slab_frees); | ||
877 | __alloc_pstat(s, __a, " pages alloced %lld\n", a->pages_alloced); | ||
878 | __alloc_pstat(s, __a, " pages freed %lld\n", a->pages_freed); | ||
879 | __alloc_pstat(s, __a, "\n"); | ||
880 | |||
881 | __alloc_pstat(s, __a, "Page size: %lld KB\n", | ||
882 | a->page_size >> 10); | ||
883 | __alloc_pstat(s, __a, "Total pages: %lld (%lld MB)\n", | ||
884 | a->length / a->page_size, | ||
885 | a->length >> 20); | ||
886 | __alloc_pstat(s, __a, "Available pages: %lld (%lld MB)\n", | ||
887 | nvgpu_alloc_space(&a->source_allocator) / a->page_size, | ||
888 | nvgpu_alloc_space(&a->source_allocator) >> 20); | ||
889 | __alloc_pstat(s, __a, "\n"); | ||
890 | |||
891 | /* | ||
892 | * Slab info. | ||
893 | */ | ||
894 | if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES) { | ||
895 | __alloc_pstat(s, __a, "Slabs:\n"); | ||
896 | __alloc_pstat(s, __a, " size empty partial full\n"); | ||
897 | __alloc_pstat(s, __a, " ---- ----- ------- ----\n"); | ||
898 | |||
899 | for (i = 0; i < a->nr_slabs; i++) { | ||
900 | struct page_alloc_slab *slab = &a->slabs[i]; | ||
901 | |||
902 | __alloc_pstat(s, __a, " %-9u %-9d %-9u %u\n", | ||
903 | slab->slab_size, | ||
904 | slab->nr_empty, slab->nr_partial, | ||
905 | slab->nr_full); | ||
906 | } | ||
907 | __alloc_pstat(s, __a, "\n"); | ||
908 | } | ||
909 | |||
910 | __alloc_pstat(s, __a, "Source alloc: %s\n", | ||
911 | a->source_allocator.name); | ||
912 | nvgpu_alloc_print_stats(&a->source_allocator, s, lock); | ||
913 | |||
914 | if (lock) | ||
915 | alloc_unlock(__a); | ||
916 | } | ||
917 | #endif | ||
918 | |||
919 | static const struct nvgpu_allocator_ops page_ops = { | ||
920 | .alloc = nvgpu_page_alloc, | ||
921 | .free = nvgpu_page_free, | ||
922 | |||
923 | .alloc_fixed = nvgpu_page_alloc_fixed, | ||
924 | .free_fixed = nvgpu_page_free_fixed, | ||
925 | |||
926 | .reserve_carveout = nvgpu_page_reserve_co, | ||
927 | .release_carveout = nvgpu_page_release_co, | ||
928 | |||
929 | .base = nvgpu_page_alloc_base, | ||
930 | .length = nvgpu_page_alloc_length, | ||
931 | .end = nvgpu_page_alloc_end, | ||
932 | .inited = nvgpu_page_alloc_inited, | ||
933 | .space = nvgpu_page_alloc_space, | ||
934 | |||
935 | .fini = nvgpu_page_allocator_destroy, | ||
936 | |||
937 | #ifdef __KERNEL__ | ||
938 | .print_stats = nvgpu_page_print_stats, | ||
939 | #endif | ||
940 | }; | ||
941 | |||
942 | /* | ||
943 | * nr_slabs is computed as follows: divide page_size by 4096 to get number of | ||
944 | * 4k pages in page_size. Then take the base 2 log of that to get number of | ||
945 | * slabs. For 64k page_size that works on like: | ||
946 | * | ||
947 | * 1024*64 / 1024*4 = 16 | ||
948 | * ilog2(16) = 4 | ||
949 | * | ||
950 | * That gives buckets of 1, 2, 4, and 8 pages (i.e 4k, 8k, 16k, 32k). | ||
951 | */ | ||
952 | static int nvgpu_page_alloc_init_slabs(struct nvgpu_page_allocator *a) | ||
953 | { | ||
954 | size_t nr_slabs = ilog2(a->page_size >> 12); | ||
955 | unsigned int i; | ||
956 | |||
957 | a->slabs = nvgpu_kcalloc(nvgpu_alloc_to_gpu(a->owner), | ||
958 | nr_slabs, | ||
959 | sizeof(struct page_alloc_slab)); | ||
960 | if (!a->slabs) | ||
961 | return -ENOMEM; | ||
962 | a->nr_slabs = nr_slabs; | ||
963 | |||
964 | for (i = 0; i < nr_slabs; i++) { | ||
965 | struct page_alloc_slab *slab = &a->slabs[i]; | ||
966 | |||
967 | slab->slab_size = SZ_4K * (1 << i); | ||
968 | nvgpu_init_list_node(&slab->empty); | ||
969 | nvgpu_init_list_node(&slab->partial); | ||
970 | nvgpu_init_list_node(&slab->full); | ||
971 | slab->nr_empty = 0; | ||
972 | slab->nr_partial = 0; | ||
973 | slab->nr_full = 0; | ||
974 | } | ||
975 | |||
976 | return 0; | ||
977 | } | ||
978 | |||
979 | int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, | ||
980 | const char *name, u64 base, u64 length, | ||
981 | u64 blk_size, u64 flags) | ||
982 | { | ||
983 | struct nvgpu_page_allocator *a; | ||
984 | char buddy_name[sizeof(__a->name)]; | ||
985 | int err; | ||
986 | |||
987 | if (blk_size < SZ_4K) | ||
988 | return -EINVAL; | ||
989 | |||
990 | a = nvgpu_kzalloc(g, sizeof(struct nvgpu_page_allocator)); | ||
991 | if (!a) | ||
992 | return -ENOMEM; | ||
993 | |||
994 | err = __nvgpu_alloc_common_init(__a, g, name, a, false, &page_ops); | ||
995 | if (err) | ||
996 | goto fail; | ||
997 | |||
998 | a->alloc_cache = nvgpu_kmem_cache_create(g, | ||
999 | sizeof(struct nvgpu_page_alloc)); | ||
1000 | a->slab_page_cache = nvgpu_kmem_cache_create(g, | ||
1001 | sizeof(struct page_alloc_slab_page)); | ||
1002 | if (!a->alloc_cache || !a->slab_page_cache) { | ||
1003 | err = -ENOMEM; | ||
1004 | goto fail; | ||
1005 | } | ||
1006 | |||
1007 | a->base = base; | ||
1008 | a->length = length; | ||
1009 | a->page_size = blk_size; | ||
1010 | a->page_shift = __ffs(blk_size); | ||
1011 | a->allocs = NULL; | ||
1012 | a->owner = __a; | ||
1013 | a->flags = flags; | ||
1014 | |||
1015 | if (flags & GPU_ALLOC_4K_VIDMEM_PAGES && blk_size > SZ_4K) { | ||
1016 | err = nvgpu_page_alloc_init_slabs(a); | ||
1017 | if (err) | ||
1018 | goto fail; | ||
1019 | } | ||
1020 | |||
1021 | snprintf(buddy_name, sizeof(buddy_name), "%s-src", name); | ||
1022 | |||
1023 | err = nvgpu_buddy_allocator_init(g, &a->source_allocator, buddy_name, | ||
1024 | base, length, blk_size, 0); | ||
1025 | if (err) | ||
1026 | goto fail; | ||
1027 | |||
1028 | #ifdef CONFIG_DEBUG_FS | ||
1029 | nvgpu_init_alloc_debug(g, __a); | ||
1030 | #endif | ||
1031 | palloc_dbg(a, "New allocator: type page\n"); | ||
1032 | palloc_dbg(a, " base 0x%llx\n", a->base); | ||
1033 | palloc_dbg(a, " size 0x%llx\n", a->length); | ||
1034 | palloc_dbg(a, " page_size 0x%llx\n", a->page_size); | ||
1035 | palloc_dbg(a, " flags 0x%llx\n", a->flags); | ||
1036 | palloc_dbg(a, " slabs: %d\n", a->nr_slabs); | ||
1037 | |||
1038 | return 0; | ||
1039 | |||
1040 | fail: | ||
1041 | if (a->alloc_cache) | ||
1042 | nvgpu_kmem_cache_destroy(a->alloc_cache); | ||
1043 | if (a->slab_page_cache) | ||
1044 | nvgpu_kmem_cache_destroy(a->slab_page_cache); | ||
1045 | nvgpu_kfree(g, a); | ||
1046 | return err; | ||
1047 | } | ||