aboutsummaryrefslogtreecommitdiffstats
path: root/include/os/linux/kmem.c
diff options
context:
space:
mode:
Diffstat (limited to 'include/os/linux/kmem.c')
-rw-r--r--include/os/linux/kmem.c653
1 files changed, 653 insertions, 0 deletions
diff --git a/include/os/linux/kmem.c b/include/os/linux/kmem.c
new file mode 100644
index 0000000..395cc45
--- /dev/null
+++ b/include/os/linux/kmem.c
@@ -0,0 +1,653 @@
1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/mm.h>
18#include <linux/slab.h>
19#include <linux/debugfs.h>
20#include <linux/seq_file.h>
21#include <linux/vmalloc.h>
22#include <linux/stacktrace.h>
23
24#include <nvgpu/lock.h>
25#include <nvgpu/kmem.h>
26#include <nvgpu/atomic.h>
27#include <nvgpu/bug.h>
28#include <nvgpu/gk20a.h>
29
30#include "kmem_priv.h"
31
32/*
33 * Statically declared because this needs to be shared across all nvgpu driver
34 * instances. This makes sure that all kmem caches are _definitely_ uniquely
35 * named.
36 */
37static atomic_t kmem_cache_id;
38
39void *__nvgpu_big_alloc(struct gk20a *g, size_t size, bool clear)
40{
41 void *p;
42
43 if (size > PAGE_SIZE) {
44 if (clear)
45 p = nvgpu_vzalloc(g, size);
46 else
47 p = nvgpu_vmalloc(g, size);
48 } else {
49 if (clear)
50 p = nvgpu_kzalloc(g, size);
51 else
52 p = nvgpu_kmalloc(g, size);
53 }
54
55 return p;
56}
57
58void nvgpu_big_free(struct gk20a *g, void *p)
59{
60 /*
61 * This will have to be fixed eventually. Allocs that use
62 * nvgpu_big_[mz]alloc() will need to remember the size of the alloc
63 * when freeing.
64 */
65 if (is_vmalloc_addr(p))
66 nvgpu_vfree(g, p);
67 else
68 nvgpu_kfree(g, p);
69}
70
71void *__nvgpu_kmalloc(struct gk20a *g, size_t size, void *ip)
72{
73 void *alloc;
74
75#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
76 alloc = __nvgpu_track_kmalloc(g, size, ip);
77#else
78 alloc = kmalloc(size, GFP_KERNEL);
79#endif
80
81 kmem_dbg(g, "kmalloc: size=%-6ld addr=0x%p gfp=0x%08x",
82 size, alloc, GFP_KERNEL);
83
84 return alloc;
85}
86
87void *__nvgpu_kzalloc(struct gk20a *g, size_t size, void *ip)
88{
89 void *alloc;
90
91#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
92 alloc = __nvgpu_track_kzalloc(g, size, ip);
93#else
94 alloc = kzalloc(size, GFP_KERNEL);
95#endif
96
97 kmem_dbg(g, "kzalloc: size=%-6ld addr=0x%p gfp=0x%08x",
98 size, alloc, GFP_KERNEL);
99
100 return alloc;
101}
102
103void *__nvgpu_kcalloc(struct gk20a *g, size_t n, size_t size, void *ip)
104{
105 void *alloc;
106
107#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
108 alloc = __nvgpu_track_kcalloc(g, n, size, ip);
109#else
110 alloc = kcalloc(n, size, GFP_KERNEL);
111#endif
112
113 kmem_dbg(g, "kcalloc: size=%-6ld addr=0x%p gfp=0x%08x",
114 n * size, alloc, GFP_KERNEL);
115
116 return alloc;
117}
118
119void *__nvgpu_vmalloc(struct gk20a *g, unsigned long size, void *ip)
120{
121 void *alloc;
122
123#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
124 alloc = __nvgpu_track_vmalloc(g, size, ip);
125#else
126 alloc = vmalloc(size);
127#endif
128
129 kmem_dbg(g, "vmalloc: size=%-6ld addr=0x%p", size, alloc);
130
131 return alloc;
132}
133
134void *__nvgpu_vzalloc(struct gk20a *g, unsigned long size, void *ip)
135{
136 void *alloc;
137
138#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
139 alloc = __nvgpu_track_vzalloc(g, size, ip);
140#else
141 alloc = vzalloc(size);
142#endif
143
144 kmem_dbg(g, "vzalloc: size=%-6ld addr=0x%p", size, alloc);
145
146 return alloc;
147}
148
149void __nvgpu_kfree(struct gk20a *g, void *addr)
150{
151 kmem_dbg(g, "kfree: addr=0x%p", addr);
152#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
153 __nvgpu_track_kfree(g, addr);
154#else
155 kfree(addr);
156#endif
157}
158
159void __nvgpu_vfree(struct gk20a *g, void *addr)
160{
161 kmem_dbg(g, "vfree: addr=0x%p", addr);
162#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
163 __nvgpu_track_vfree(g, addr);
164#else
165 vfree(addr);
166#endif
167}
168
169#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
170
171void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
172{
173 nvgpu_mutex_acquire(&tracker->lock);
174}
175
176void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
177{
178 nvgpu_mutex_release(&tracker->lock);
179}
180
181void kmem_print_mem_alloc(struct gk20a *g,
182 struct nvgpu_mem_alloc *alloc,
183 struct seq_file *s)
184{
185#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
186 int i;
187
188 __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld\n",
189 alloc->addr, alloc->size);
190 for (i = 0; i < alloc->stack_length; i++)
191 __pstat(s, " %3d [<%p>] %pS\n", i,
192 (void *)alloc->stack[i],
193 (void *)alloc->stack[i]);
194 __pstat(s, "\n");
195#else
196 __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld src=%pF\n",
197 alloc->addr, alloc->size, alloc->ip);
198#endif
199}
200
201static int nvgpu_add_alloc(struct nvgpu_mem_alloc_tracker *tracker,
202 struct nvgpu_mem_alloc *alloc)
203{
204 alloc->allocs_entry.key_start = alloc->addr;
205 alloc->allocs_entry.key_end = alloc->addr + alloc->size;
206
207 nvgpu_rbtree_insert(&alloc->allocs_entry, &tracker->allocs);
208 return 0;
209}
210
211static struct nvgpu_mem_alloc *nvgpu_rem_alloc(
212 struct nvgpu_mem_alloc_tracker *tracker, u64 alloc_addr)
213{
214 struct nvgpu_mem_alloc *alloc;
215 struct nvgpu_rbtree_node *node = NULL;
216
217 nvgpu_rbtree_search(alloc_addr, &node, tracker->allocs);
218 if (!node)
219 return NULL;
220
221 alloc = nvgpu_mem_alloc_from_rbtree_node(node);
222
223 nvgpu_rbtree_unlink(node, &tracker->allocs);
224
225 return alloc;
226}
227
228static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
229 unsigned long size, unsigned long real_size,
230 u64 addr, void *ip)
231{
232 int ret;
233 struct nvgpu_mem_alloc *alloc;
234#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
235 struct stack_trace stack_trace;
236#endif
237
238 alloc = kzalloc(sizeof(*alloc), GFP_KERNEL);
239 if (!alloc)
240 return -ENOMEM;
241
242 alloc->owner = tracker;
243 alloc->size = size;
244 alloc->real_size = real_size;
245 alloc->addr = addr;
246 alloc->ip = ip;
247
248#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
249 stack_trace.max_entries = MAX_STACK_TRACE;
250 stack_trace.nr_entries = 0;
251 stack_trace.entries = alloc->stack;
252 /*
253 * This 4 here skips the 2 function calls that happen for all traced
254 * allocs due to nvgpu:
255 *
256 * __nvgpu_save_kmem_alloc+0x7c/0x128
257 * __nvgpu_track_kzalloc+0xcc/0xf8
258 *
259 * And the function calls that get made by the stack trace code itself.
260 * If the trace savings code changes this will likely have to change
261 * as well.
262 */
263 stack_trace.skip = 4;
264 save_stack_trace(&stack_trace);
265 alloc->stack_length = stack_trace.nr_entries;
266#endif
267
268 nvgpu_lock_tracker(tracker);
269 tracker->bytes_alloced += size;
270 tracker->bytes_alloced_real += real_size;
271 tracker->nr_allocs++;
272
273 /* Keep track of this for building a histogram later on. */
274 if (tracker->max_alloc < size)
275 tracker->max_alloc = size;
276 if (tracker->min_alloc > size)
277 tracker->min_alloc = size;
278
279 ret = nvgpu_add_alloc(tracker, alloc);
280 if (ret) {
281 WARN(1, "Duplicate alloc??? 0x%llx\n", addr);
282 kfree(alloc);
283 nvgpu_unlock_tracker(tracker);
284 return ret;
285 }
286 nvgpu_unlock_tracker(tracker);
287
288 return 0;
289}
290
291static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
292 u64 addr)
293{
294 struct nvgpu_mem_alloc *alloc;
295
296 nvgpu_lock_tracker(tracker);
297 alloc = nvgpu_rem_alloc(tracker, addr);
298 if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) {
299 nvgpu_unlock_tracker(tracker);
300 return -EINVAL;
301 }
302
303 memset((void *)alloc->addr, 0, alloc->size);
304
305 tracker->nr_frees++;
306 tracker->bytes_freed += alloc->size;
307 tracker->bytes_freed_real += alloc->real_size;
308 nvgpu_unlock_tracker(tracker);
309
310 return 0;
311}
312
313static void __nvgpu_check_valloc_size(unsigned long size)
314{
315 WARN(size < PAGE_SIZE, "Alloc smaller than page size! (%lu)!\n", size);
316}
317
318static void __nvgpu_check_kalloc_size(size_t size)
319{
320 WARN(size > PAGE_SIZE, "Alloc larger than page size! (%zu)!\n", size);
321}
322
323void *__nvgpu_track_vmalloc(struct gk20a *g, unsigned long size,
324 void *ip)
325{
326 void *alloc = vmalloc(size);
327
328 if (!alloc)
329 return NULL;
330
331 __nvgpu_check_valloc_size(size);
332
333 /*
334 * Ignore the return message. If this fails let's not cause any issues
335 * for the rest of the driver.
336 */
337 __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
338 (u64)(uintptr_t)alloc, ip);
339
340 return alloc;
341}
342
343void *__nvgpu_track_vzalloc(struct gk20a *g, unsigned long size,
344 void *ip)
345{
346 void *alloc = vzalloc(size);
347
348 if (!alloc)
349 return NULL;
350
351 __nvgpu_check_valloc_size(size);
352
353 /*
354 * Ignore the return message. If this fails let's not cause any issues
355 * for the rest of the driver.
356 */
357 __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
358 (u64)(uintptr_t)alloc, ip);
359
360 return alloc;
361}
362
363void *__nvgpu_track_kmalloc(struct gk20a *g, size_t size, void *ip)
364{
365 void *alloc = kmalloc(size, GFP_KERNEL);
366
367 if (!alloc)
368 return NULL;
369
370 __nvgpu_check_kalloc_size(size);
371
372 __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
373 (u64)(uintptr_t)alloc, ip);
374
375 return alloc;
376}
377
378void *__nvgpu_track_kzalloc(struct gk20a *g, size_t size, void *ip)
379{
380 void *alloc = kzalloc(size, GFP_KERNEL);
381
382 if (!alloc)
383 return NULL;
384
385 __nvgpu_check_kalloc_size(size);
386
387 __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
388 (u64)(uintptr_t)alloc, ip);
389
390 return alloc;
391}
392
393void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size,
394 void *ip)
395{
396 void *alloc = kcalloc(n, size, GFP_KERNEL);
397
398 if (!alloc)
399 return NULL;
400
401 __nvgpu_check_kalloc_size(n * size);
402
403 __nvgpu_save_kmem_alloc(g->kmallocs, n * size,
404 roundup_pow_of_two(n * size),
405 (u64)(uintptr_t)alloc, ip);
406
407 return alloc;
408}
409
410void __nvgpu_track_vfree(struct gk20a *g, void *addr)
411{
412 /*
413 * Often it is accepted practice to pass NULL pointers into free
414 * functions to save code.
415 */
416 if (!addr)
417 return;
418
419 __nvgpu_free_kmem_alloc(g->vmallocs, (u64)(uintptr_t)addr);
420
421 vfree(addr);
422}
423
424void __nvgpu_track_kfree(struct gk20a *g, void *addr)
425{
426 if (!addr)
427 return;
428
429 __nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr);
430
431 kfree(addr);
432}
433
434static int __do_check_for_outstanding_allocs(
435 struct gk20a *g,
436 struct nvgpu_mem_alloc_tracker *tracker,
437 const char *type, bool silent)
438{
439 struct nvgpu_rbtree_node *node;
440 int count = 0;
441
442 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
443 while (node) {
444 struct nvgpu_mem_alloc *alloc =
445 nvgpu_mem_alloc_from_rbtree_node(node);
446
447 if (!silent)
448 kmem_print_mem_alloc(g, alloc, NULL);
449
450 count++;
451 nvgpu_rbtree_enum_next(&node, node);
452 }
453
454 return count;
455}
456
457/**
458 * check_for_outstanding_allocs - Count and display outstanding allocs
459 *
460 * @g - The GPU.
461 * @silent - If set don't print anything about the allocs.
462 *
463 * Dump (or just count) the number of allocations left outstanding.
464 */
465static int check_for_outstanding_allocs(struct gk20a *g, bool silent)
466{
467 int count = 0;
468
469 count += __do_check_for_outstanding_allocs(g, g->kmallocs, "kmalloc",
470 silent);
471 count += __do_check_for_outstanding_allocs(g, g->vmallocs, "vmalloc",
472 silent);
473
474 return count;
475}
476
477static void do_nvgpu_kmem_cleanup(struct nvgpu_mem_alloc_tracker *tracker,
478 void (*force_free_func)(const void *))
479{
480 struct nvgpu_rbtree_node *node;
481
482 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
483 while (node) {
484 struct nvgpu_mem_alloc *alloc =
485 nvgpu_mem_alloc_from_rbtree_node(node);
486
487 if (force_free_func)
488 force_free_func((void *)alloc->addr);
489
490 nvgpu_rbtree_unlink(node, &tracker->allocs);
491 kfree(alloc);
492
493 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
494 }
495}
496
497/**
498 * nvgpu_kmem_cleanup - Cleanup the kmem tracking
499 *
500 * @g - The GPU.
501 * @force_free - If set will also free leaked objects if possible.
502 *
503 * Cleanup all of the allocs made by nvgpu_kmem tracking code. If @force_free
504 * is non-zero then the allocation made by nvgpu is also freed. This is risky,
505 * though, as it is possible that the memory is still in use by other parts of
506 * the GPU driver not aware that this has happened.
507 *
508 * In theory it should be fine if the GPU driver has been deinitialized and
509 * there are no bugs in that code. However, if there are any bugs in that code
510 * then they could likely manifest as odd crashes indeterminate amounts of time
511 * in the future. So use @force_free at your own risk.
512 */
513static void nvgpu_kmem_cleanup(struct gk20a *g, bool force_free)
514{
515 do_nvgpu_kmem_cleanup(g->kmallocs, force_free ? kfree : NULL);
516 do_nvgpu_kmem_cleanup(g->vmallocs, force_free ? vfree : NULL);
517}
518
519void nvgpu_kmem_fini(struct gk20a *g, int flags)
520{
521 int count;
522 bool silent, force_free;
523
524 if (!flags)
525 return;
526
527 silent = !(flags & NVGPU_KMEM_FINI_DUMP_ALLOCS);
528 force_free = !!(flags & NVGPU_KMEM_FINI_FORCE_CLEANUP);
529
530 count = check_for_outstanding_allocs(g, silent);
531 nvgpu_kmem_cleanup(g, force_free);
532
533 /*
534 * If we leak objects we can either BUG() out or just WARN(). In general
535 * it doesn't make sense to BUG() on here since leaking a few objects
536 * won't crash the kernel but it can be helpful for development.
537 *
538 * If neither flag is set then we just silently do nothing.
539 */
540 if (count > 0) {
541 if (flags & NVGPU_KMEM_FINI_WARN) {
542 WARN(1, "Letting %d allocs leak!!\n", count);
543 } else if (flags & NVGPU_KMEM_FINI_BUG) {
544 nvgpu_err(g, "Letting %d allocs leak!!", count);
545 BUG();
546 }
547 }
548}
549
550int nvgpu_kmem_init(struct gk20a *g)
551{
552 int err;
553
554 g->vmallocs = kzalloc(sizeof(*g->vmallocs), GFP_KERNEL);
555 g->kmallocs = kzalloc(sizeof(*g->kmallocs), GFP_KERNEL);
556
557 if (!g->vmallocs || !g->kmallocs) {
558 err = -ENOMEM;
559 goto fail;
560 }
561
562 g->vmallocs->name = "vmalloc";
563 g->kmallocs->name = "kmalloc";
564
565 g->vmallocs->allocs = NULL;
566 g->kmallocs->allocs = NULL;
567
568 nvgpu_mutex_init(&g->vmallocs->lock);
569 nvgpu_mutex_init(&g->kmallocs->lock);
570
571 g->vmallocs->min_alloc = PAGE_SIZE;
572 g->kmallocs->min_alloc = KMALLOC_MIN_SIZE;
573
574 /*
575 * This needs to go after all the other initialization since they use
576 * the nvgpu_kzalloc() API.
577 */
578 g->vmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
579 sizeof(struct nvgpu_mem_alloc));
580 g->kmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
581 sizeof(struct nvgpu_mem_alloc));
582
583 if (!g->vmallocs->allocs_cache || !g->kmallocs->allocs_cache) {
584 err = -ENOMEM;
585 if (g->vmallocs->allocs_cache)
586 nvgpu_kmem_cache_destroy(g->vmallocs->allocs_cache);
587 if (g->kmallocs->allocs_cache)
588 nvgpu_kmem_cache_destroy(g->kmallocs->allocs_cache);
589 goto fail;
590 }
591
592 return 0;
593
594fail:
595 if (g->vmallocs)
596 kfree(g->vmallocs);
597 if (g->kmallocs)
598 kfree(g->kmallocs);
599 return err;
600}
601
602#else /* !CONFIG_NVGPU_TRACK_MEM_USAGE */
603
604int nvgpu_kmem_init(struct gk20a *g)
605{
606 return 0;
607}
608
609void nvgpu_kmem_fini(struct gk20a *g, int flags)
610{
611}
612#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
613
614struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size)
615{
616 struct nvgpu_kmem_cache *cache =
617 nvgpu_kzalloc(g, sizeof(struct nvgpu_kmem_cache));
618
619 if (!cache)
620 return NULL;
621
622 cache->g = g;
623
624 snprintf(cache->name, sizeof(cache->name),
625 "nvgpu-cache-0x%p-%d-%d", g, (int)size,
626 atomic_inc_return(&kmem_cache_id));
627 cache->cache = kmem_cache_create(cache->name,
628 size, size, 0, NULL);
629 if (!cache->cache) {
630 nvgpu_kfree(g, cache);
631 return NULL;
632 }
633
634 return cache;
635}
636
637void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache)
638{
639 struct gk20a *g = cache->g;
640
641 kmem_cache_destroy(cache->cache);
642 nvgpu_kfree(g, cache);
643}
644
645void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache)
646{
647 return kmem_cache_alloc(cache->cache, GFP_KERNEL);
648}
649
650void nvgpu_kmem_cache_free(struct nvgpu_kmem_cache *cache, void *ptr)
651{
652 kmem_cache_free(cache->cache, ptr);
653}