summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/linux/kmem.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/kmem.c')
-rw-r--r--drivers/gpu/nvgpu/common/linux/kmem.c806
1 files changed, 793 insertions, 13 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/kmem.c b/drivers/gpu/nvgpu/common/linux/kmem.c
index 24e0ca5d..60e79348 100644
--- a/drivers/gpu/nvgpu/common/linux/kmem.c
+++ b/drivers/gpu/nvgpu/common/linux/kmem.c
@@ -15,11 +15,22 @@
15 */ 15 */
16 16
17#include <linux/kernel.h> 17#include <linux/kernel.h>
18#include <linux/mutex.h>
18#include <linux/slab.h> 19#include <linux/slab.h>
19#include <linux/atomic.h> 20#include <linux/atomic.h>
21#include <linux/rbtree.h>
22#include <linux/debugfs.h>
23#include <linux/spinlock.h>
24#include <linux/seq_file.h>
25#include <linux/vmalloc.h>
26#include <linux/stacktrace.h>
20 27
21#include <nvgpu/kmem.h> 28#include <nvgpu/kmem.h>
22 29
30#include "gk20a/gk20a.h"
31
32#include "kmem_priv.h"
33
23/* 34/*
24 * Statically declared because this needs to be shared across all nvgpu driver 35 * Statically declared because this needs to be shared across all nvgpu driver
25 * instances. This makes sure that all kmem caches are _definitely_ uniquely 36 * instances. This makes sure that all kmem caches are _definitely_ uniquely
@@ -27,26 +38,793 @@
27 */ 38 */
28static atomic_t kmem_cache_id; 39static atomic_t kmem_cache_id;
29 40
30/* 41#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
31 * Linux specific version of the nvgpu_kmem_cache struct. This type is 42
32 * completely opaque to the rest of the driver. 43static void lock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
44{
45 mutex_lock(&tracker->lock);
46}
47
48static void unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
49{
50 mutex_unlock(&tracker->lock);
51}
52
53static void kmem_print_mem_alloc(struct gk20a *g,
54 struct nvgpu_mem_alloc *alloc,
55 struct seq_file *s)
56{
57#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
58 int i;
59
60 __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld\n",
61 alloc->addr, alloc->size);
62 for (i = 0; i < alloc->stack_length; i++)
63 __pstat(s, " %3d [<%p>] %pS\n", i,
64 (void *)alloc->stack[i],
65 (void *)alloc->stack[i]);
66 __pstat(s, "\n");
67#else
68 __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld src=%pF\n",
69 alloc->addr, alloc->size, alloc->ip);
70#endif
71}
72
73static int nvgpu_add_alloc(struct nvgpu_mem_alloc_tracker *tracker,
74 struct nvgpu_mem_alloc *alloc)
75{
76 struct rb_node **new = &tracker->allocs.rb_node;
77 struct rb_node *parent = NULL;
78
79 while (*new) {
80 struct nvgpu_mem_alloc *tmp = rb_entry(*new,
81 struct nvgpu_mem_alloc,
82 allocs_entry);
83
84 parent = *new;
85
86 if (alloc->addr < tmp->addr)
87 new = &(*new)->rb_left;
88 else if (alloc->addr > tmp->addr)
89 new = &(*new)->rb_right;
90 else
91 return -EINVAL;
92 }
93
94 /* Put the new node there */
95 rb_link_node(&alloc->allocs_entry, parent, new);
96 rb_insert_color(&alloc->allocs_entry, &tracker->allocs);
97
98 return 0;
99}
100
101static struct nvgpu_mem_alloc *nvgpu_rem_alloc(
102 struct nvgpu_mem_alloc_tracker *tracker, u64 alloc_addr)
103{
104 struct rb_node *node = tracker->allocs.rb_node;
105 struct nvgpu_mem_alloc *alloc;
106
107 while (node) {
108 alloc = container_of(node,
109 struct nvgpu_mem_alloc, allocs_entry);
110
111 if (alloc_addr < alloc->addr)
112 node = node->rb_left;
113 else if (alloc_addr > alloc->addr)
114 node = node->rb_right;
115 else
116 break;
117 }
118
119 if (!node)
120 return NULL;
121
122 rb_erase(node, &tracker->allocs);
123
124 return alloc;
125}
126
127static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
128 unsigned long size, unsigned long real_size,
129 u64 addr, unsigned long ip)
130{
131 int ret;
132 struct nvgpu_mem_alloc *alloc;
133#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
134 struct stack_trace stack_trace;
135#endif
136
137 alloc = kzalloc(sizeof(*alloc), GFP_KERNEL);
138 if (!alloc)
139 return -ENOMEM;
140
141 alloc->owner = tracker;
142 alloc->size = size;
143 alloc->real_size = real_size;
144 alloc->addr = addr;
145 alloc->ip = (void *)(uintptr_t)ip;
146
147#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
148 stack_trace.max_entries = MAX_STACK_TRACE;
149 stack_trace.nr_entries = 0;
150 stack_trace.entries = alloc->stack;
151 /*
152 * This 4 here skips the 2 function calls that happen for all traced
153 * allocs due to nvgpu:
154 *
155 * __nvgpu_save_kmem_alloc+0x7c/0x128
156 * __nvgpu_track_kzalloc+0xcc/0xf8
157 *
158 * And the function calls that get made by the stack trace code itself.
159 * If the trace savings code changes this will likely have to change
160 * as well.
161 */
162 stack_trace.skip = 4;
163 save_stack_trace(&stack_trace);
164 alloc->stack_length = stack_trace.nr_entries;
165#endif
166
167 lock_tracker(tracker);
168 tracker->bytes_alloced += size;
169 tracker->bytes_alloced_real += real_size;
170 tracker->nr_allocs++;
171
172 /* Keep track of this for building a histogram later on. */
173 if (tracker->max_alloc < size)
174 tracker->max_alloc = size;
175 if (tracker->min_alloc > size)
176 tracker->min_alloc = size;
177
178 ret = nvgpu_add_alloc(tracker, alloc);
179 if (ret) {
180 WARN(1, "Duplicate alloc??? 0x%llx\n", addr);
181 kfree(alloc);
182 unlock_tracker(tracker);
183 return ret;
184 }
185 unlock_tracker(tracker);
186
187 return 0;
188}
189
190static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
191 u64 addr)
192{
193 struct nvgpu_mem_alloc *alloc;
194
195 lock_tracker(tracker);
196 alloc = nvgpu_rem_alloc(tracker, addr);
197 if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) {
198 unlock_tracker(tracker);
199 return -EINVAL;
200 }
201
202 tracker->nr_frees++;
203 tracker->bytes_freed += alloc->size;
204 tracker->bytes_freed_real += alloc->real_size;
205 unlock_tracker(tracker);
206
207 return 0;
208}
209
210static void __nvgpu_check_valloc_size(unsigned long size)
211{
212 WARN(size < PAGE_SIZE, "Alloc smaller than page size! (%lu)!\n", size);
213}
214
215static void __nvgpu_check_kalloc_size(size_t size)
216{
217 WARN(size > PAGE_SIZE, "Alloc larger than page size! (%zu)!\n", size);
218}
219
220void *__nvgpu_track_vmalloc(struct gk20a *g, unsigned long size,
221 unsigned long ip)
222{
223 void *alloc = vmalloc(size);
224
225 if (!alloc)
226 return NULL;
227
228 kmem_dbg("vmalloc: size=%-6ld addr=0x%p", size, alloc);
229 __nvgpu_check_valloc_size(size);
230
231 /*
232 * Ignore the return message. If this fails let's not cause any issues
233 * for the rest of the driver.
234 */
235 __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
236 (u64)(uintptr_t)alloc, ip);
237
238 return alloc;
239}
240
241void *__nvgpu_track_vzalloc(struct gk20a *g, unsigned long size,
242 unsigned long ip)
243{
244 void *alloc = vzalloc(size);
245
246 if (!alloc)
247 return NULL;
248
249 kmem_dbg("vzalloc: size=%-6ld addr=0x%p", size, alloc);
250 __nvgpu_check_valloc_size(size);
251
252 /*
253 * Ignore the return message. If this fails let's not cause any issues
254 * for the rest of the driver.
255 */
256 __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
257 (u64)(uintptr_t)alloc, ip);
258
259 return alloc;
260}
261
262void *__nvgpu_track_kmalloc(struct gk20a *g, size_t size, unsigned long ip)
263{
264 void *alloc = kmalloc(size, GFP_KERNEL);
265
266 if (!alloc)
267 return NULL;
268
269 kmem_dbg("kmalloc: size=%-6ld addr=0x%p gfp=0x%08x",
270 size, alloc, GFP_KERNEL);
271 __nvgpu_check_kalloc_size(size);
272
273 __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
274 (u64)(uintptr_t)alloc, ip);
275
276 return alloc;
277}
278
279void *__nvgpu_track_kzalloc(struct gk20a *g, size_t size, unsigned long ip)
280{
281 void *alloc = kzalloc(size, GFP_KERNEL);
282
283 if (!alloc)
284 return NULL;
285
286 kmem_dbg("kzalloc: size=%-6ld addr=0x%p gfp=0x%08x",
287 size, alloc, GFP_KERNEL);
288 __nvgpu_check_kalloc_size(size);
289
290 __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
291 (u64)(uintptr_t)alloc, ip);
292
293 return alloc;
294}
295
296void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size,
297 unsigned long ip)
298{
299 void *alloc = kcalloc(n, size, GFP_KERNEL);
300
301 if (!alloc)
302 return NULL;
303
304 kmem_dbg("kcalloc: size=%-6ld addr=0x%p gfp=0x%08x",
305 n * size, alloc, GFP_KERNEL);
306 __nvgpu_check_kalloc_size(n * size);
307
308 __nvgpu_save_kmem_alloc(g->kmallocs, n * size,
309 roundup_pow_of_two(n * size),
310 (u64)(uintptr_t)alloc, ip);
311
312 return alloc;
313}
314
315void __nvgpu_track_vfree(struct gk20a *g, void *addr)
316{
317 /*
318 * Often it is accepted practice to pass NULL pointers into free
319 * functions to save code.
320 */
321 if (!addr)
322 return;
323
324 vfree(addr);
325
326 kmem_dbg("vfree: addr=0x%p", addr);
327
328 __nvgpu_free_kmem_alloc(g->vmallocs, (u64)(uintptr_t)addr);
329}
330
331void __nvgpu_track_kfree(struct gk20a *g, void *addr)
332{
333 if (!addr)
334 return;
335
336 kfree(addr);
337
338 kmem_dbg("kfree: addr=0x%p", addr);
339
340 __nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr);
341}
342
343/**
344 * to_human_readable_bytes - Determine suffix for passed size.
345 *
346 * @bytes - Number of bytes to generate a suffix for.
347 * @hr_bytes [out] - The human readable number of bytes.
348 * @hr_suffix [out] - The suffix for the HR number of bytes.
349 *
350 * Computes a human readable decomposition of the passed number of bytes. The
351 * suffix for the bytes is passed back through the @hr_suffix pointer. The right
352 * number of bytes is then passed back in @hr_bytes. This returns the following
353 * ranges:
354 *
355 * 0 - 1023 B
356 * 1 - 1023 KB
357 * 1 - 1023 MB
358 * 1 - 1023 GB
359 * 1 - 1023 TB
360 * 1 - ... PB
361 */
362static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes,
363 const char **hr_suffix)
364{
365 static const char *suffixes[] =
366 { "B", "KB", "MB", "GB", "TB", "PB" };
367
368 u64 suffix_ind = 0;
369
370 while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) {
371 bytes >>= 10;
372 suffix_ind++;
373 }
374
375 /*
376 * Handle case where bytes > 1023PB.
377 */
378 suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ?
379 suffix_ind : ARRAY_SIZE(suffixes) - 1;
380
381 *hr_bytes = bytes;
382 *hr_suffix = suffixes[suffix_ind];
383}
384
385/**
386 * print_hr_bytes - Print human readable bytes
387 *
388 * @s - A seq_file to print to. May be NULL.
389 * @msg - A message to print before the bytes.
390 * @bytes - Number of bytes.
391 *
392 * Print @msg followed by the human readable decomposition of the passed number
393 * of bytes.
394 *
395 * If @s is NULL then this prints will be made to the kernel log.
396 */
397static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes)
398{
399 u64 hr_bytes;
400 const char *hr_suffix;
401
402 __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix);
403 __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix);
404}
405
406/**
407 * print_histogram - Build a histogram of the memory usage.
408 *
409 * @tracker The tracking to pull data from.
410 * @s A seq_file to dump info into.
33 */ 411 */
34struct nvgpu_kmem_cache { 412static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker,
35 struct gk20a *g; 413 struct seq_file *s)
36 struct kmem_cache *cache; 414{
415 int i;
416 u64 pot_min, pot_max;
417 u64 nr_buckets;
418 unsigned int *buckets;
419 unsigned int total_allocs;
420 struct rb_node *node;
421 static const char histogram_line[] =
422 "++++++++++++++++++++++++++++++++++++++++";
423
424 /*
425 * pot_min is essentially a round down to the nearest power of 2. This
426 * is the start of the histogram. pot_max is just a round up to the
427 * nearest power of two. Each histogram bucket is one power of two so
428 * the histogram buckets are exponential.
429 */
430 pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc);
431 pot_max = (u64)roundup_pow_of_two(tracker->max_alloc);
432
433 nr_buckets = __ffs(pot_max) - __ffs(pot_min);
434
435 buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL);
436 if (!buckets) {
437 __pstat(s, "OOM: could not allocate bucket storage!?\n");
438 return;
439 }
37 440
38 /* 441 /*
39 * Memory to hold the kmem_cache unique name. Only necessary on our 442 * Iterate across all of the allocs and determine what bucket they
40 * k3.10 kernel when not using the SLUB allocator but it's easier to 443 * should go in. Round the size down to the nearest power of two to
41 * just carry this on to newer kernels. 444 * find the right bucket.
42 */ 445 */
43 char name[128]; 446 for (node = rb_first(&tracker->allocs);
447 node != NULL;
448 node = rb_next(node)) {
449 int b;
450 u64 bucket_min;
451 struct nvgpu_mem_alloc *alloc;
452
453 alloc = container_of(node, struct nvgpu_mem_alloc,
454 allocs_entry);
455 bucket_min = (u64)rounddown_pow_of_two(alloc->size);
456 if (bucket_min < tracker->min_alloc)
457 bucket_min = tracker->min_alloc;
458
459 b = __ffs(bucket_min) - __ffs(pot_min);
460
461 /*
462 * Handle the one case were there's an alloc exactly as big as
463 * the maximum bucket size of the largest bucket. Most of the
464 * buckets have an inclusive minimum and exclusive maximum. But
465 * the largest bucket needs to have an _inclusive_ maximum as
466 * well.
467 */
468 if (b == (int)nr_buckets)
469 b--;
470
471 buckets[b]++;
472 }
473
474 total_allocs = 0;
475 for (i = 0; i < (int)nr_buckets; i++)
476 total_allocs += buckets[i];
477
478 __pstat(s, "Alloc histogram:\n");
479
480 /*
481 * Actually compute the histogram lines.
482 */
483 for (i = 0; i < (int)nr_buckets; i++) {
484 char this_line[sizeof(histogram_line) + 1];
485 u64 line_length;
486 u64 hr_bytes;
487 const char *hr_suffix;
488
489 memset(this_line, 0, sizeof(this_line));
490
491 /*
492 * Compute the normalized line length. Cant use floating point
493 * so we will just multiply everything by 1000 and use fixed
494 * point.
495 */
496 line_length = (1000 * buckets[i]) / total_allocs;
497 line_length *= sizeof(histogram_line);
498 line_length /= 1000;
499
500 memset(this_line, '+', line_length);
501
502 __to_human_readable_bytes(1 << (__ffs(pot_min) + i),
503 &hr_bytes, &hr_suffix);
504 __pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n",
505 hr_bytes, hr_bytes << 1,
506 hr_suffix, buckets[i], this_line);
507 }
508}
509
510/**
511 * nvgpu_kmem_print_stats - Print kmem tracking stats.
512 *
513 * @tracker The tracking to pull data from.
514 * @s A seq_file to dump info into.
515 *
516 * Print stats from a tracker. If @s is non-null then seq_printf() will be
517 * used with @s. Otherwise the stats are pr_info()ed.
518 */
519void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker,
520 struct seq_file *s)
521{
522 lock_tracker(tracker);
523
524 __pstat(s, "Mem tracker: %s\n\n", tracker->name);
525
526 __pstat(s, "Basic Stats:\n");
527 __pstat(s, " Number of allocs %lld\n",
528 tracker->nr_allocs);
529 __pstat(s, " Number of frees %lld\n",
530 tracker->nr_frees);
531 print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc);
532 print_hr_bytes(s, " Largest alloc ", tracker->max_alloc);
533 print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced);
534 print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed);
535 print_hr_bytes(s, " Bytes allocated (real) ",
536 tracker->bytes_alloced_real);
537 print_hr_bytes(s, " Bytes freed (real) ",
538 tracker->bytes_freed_real);
539 __pstat(s, "\n");
540
541 print_histogram(tracker, s);
542
543 unlock_tracker(tracker);
544}
545
546#if defined(CONFIG_DEBUG_FS)
547static int __kmem_tracking_show(struct seq_file *s, void *unused)
548{
549 struct nvgpu_mem_alloc_tracker *tracker = s->private;
550
551 nvgpu_kmem_print_stats(tracker, s);
552
553 return 0;
554}
555
556static int __kmem_tracking_open(struct inode *inode, struct file *file)
557{
558 return single_open(file, __kmem_tracking_show, inode->i_private);
559}
560
561static const struct file_operations __kmem_tracking_fops = {
562 .open = __kmem_tracking_open,
563 .read = seq_read,
564 .llseek = seq_lseek,
565 .release = single_release,
566};
567
568static int __kmem_traces_dump_tracker(struct gk20a *g,
569 struct nvgpu_mem_alloc_tracker *tracker,
570 struct seq_file *s)
571{
572 struct rb_node *node;
573
574 for (node = rb_first(&tracker->allocs);
575 node != NULL;
576 node = rb_next(node)) {
577 struct nvgpu_mem_alloc *alloc;
578
579 alloc = container_of(node, struct nvgpu_mem_alloc,
580 allocs_entry);
581
582 kmem_print_mem_alloc(g, alloc, s);
583 }
584
585 return 0;
586}
587
588static int __kmem_traces_show(struct seq_file *s, void *unused)
589{
590 struct gk20a *g = s->private;
591
592 lock_tracker(g->vmallocs);
593 seq_puts(s, "Oustanding vmallocs:\n");
594 __kmem_traces_dump_tracker(g, g->vmallocs, s);
595 seq_puts(s, "\n");
596 unlock_tracker(g->vmallocs);
597
598 lock_tracker(g->kmallocs);
599 seq_puts(s, "Oustanding kmallocs:\n");
600 __kmem_traces_dump_tracker(g, g->kmallocs, s);
601 unlock_tracker(g->kmallocs);
602
603 return 0;
604}
605
606static int __kmem_traces_open(struct inode *inode, struct file *file)
607{
608 return single_open(file, __kmem_traces_show, inode->i_private);
609}
610
611static const struct file_operations __kmem_traces_fops = {
612 .open = __kmem_traces_open,
613 .read = seq_read,
614 .llseek = seq_lseek,
615 .release = single_release,
44}; 616};
45 617
618void nvgpu_kmem_debugfs_init(struct device *dev)
619{
620 struct gk20a_platform *plat = dev_get_drvdata(dev);
621 struct gk20a *g = get_gk20a(dev);
622 struct dentry *gpu_root = plat->debugfs;
623 struct dentry *node;
624
625 g->debugfs_kmem = debugfs_create_dir("kmem_tracking", gpu_root);
626 if (IS_ERR_OR_NULL(g->debugfs_kmem))
627 return;
628
629 node = debugfs_create_file(g->vmallocs->name, S_IRUGO,
630 g->debugfs_kmem,
631 g->vmallocs, &__kmem_tracking_fops);
632 node = debugfs_create_file(g->kmallocs->name, S_IRUGO,
633 g->debugfs_kmem,
634 g->kmallocs, &__kmem_tracking_fops);
635 node = debugfs_create_file("traces", S_IRUGO,
636 g->debugfs_kmem,
637 g, &__kmem_traces_fops);
638}
639#else
640void nvgpu_kmem_debugfs_init(struct device *dev)
641{
642}
643#endif
644
645static int __do_check_for_outstanding_allocs(
646 struct gk20a *g,
647 struct nvgpu_mem_alloc_tracker *tracker,
648 const char *type, bool silent)
649{
650 struct rb_node *node;
651 int count = 0;
652
653 for (node = rb_first(&tracker->allocs);
654 node != NULL;
655 node = rb_next(node)) {
656 struct nvgpu_mem_alloc *alloc;
657
658 alloc = container_of(node, struct nvgpu_mem_alloc,
659 allocs_entry);
660
661 if (!silent)
662 kmem_print_mem_alloc(g, alloc, NULL);
663
664 count++;
665 }
666
667 return count;
668}
669
670/**
671 * check_for_outstanding_allocs - Count and display outstanding allocs
672 *
673 * @g - The GPU.
674 * @silent - If set don't print anything about the allocs.
675 *
676 * Dump (or just count) the number of allocations left outstanding.
677 */
678static int check_for_outstanding_allocs(struct gk20a *g, bool silent)
679{
680 int count = 0;
681
682 count += __do_check_for_outstanding_allocs(g, g->kmallocs, "kmalloc",
683 silent);
684 count += __do_check_for_outstanding_allocs(g, g->vmallocs, "vmalloc",
685 silent);
686
687 return count;
688}
689
690static void do_nvgpu_kmem_cleanup(struct nvgpu_mem_alloc_tracker *tracker,
691 void (*force_free_func)(const void *))
692{
693 struct rb_node *node;
694
695 while ((node = rb_first(&tracker->allocs)) != NULL) {
696 struct nvgpu_mem_alloc *alloc;
697
698 alloc = container_of(node, struct nvgpu_mem_alloc,
699 allocs_entry);
700 if (force_free_func)
701 force_free_func((void *)alloc->addr);
702
703 kfree(alloc);
704 }
705}
706
707/**
708 * nvgpu_kmem_cleanup - Cleanup the kmem tracking
709 *
710 * @g - The GPU.
711 * @force_free - If set will also free leaked objects if possible.
712 *
713 * Cleanup all of the allocs made by nvgpu_kmem tracking code. If @force_free
714 * is non-zero then the allocation made by nvgpu is also freed. This is risky,
715 * though, as it is possible that the memory is still in use by other parts of
716 * the GPU driver not aware that this has happened.
717 *
718 * In theory it should be fine if the GPU driver has been deinitialized and
719 * there are no bugs in that code. However, if there are any bugs in that code
720 * then they could likely manifest as odd crashes indeterminate amounts of time
721 * in the future. So use @force_free at your own risk.
722 */
723static void nvgpu_kmem_cleanup(struct gk20a *g, bool force_free)
724{
725 do_nvgpu_kmem_cleanup(g->kmallocs, force_free ? kfree : NULL);
726 do_nvgpu_kmem_cleanup(g->vmallocs, force_free ? vfree : NULL);
727}
728
729void nvgpu_kmem_fini(struct gk20a *g, int flags)
730{
731 int count;
732 bool silent, force_free;
733
734 if (!flags)
735 return;
736
737 silent = !(flags & NVGPU_KMEM_FINI_DUMP_ALLOCS);
738 force_free = !!(flags & NVGPU_KMEM_FINI_FORCE_CLEANUP);
739
740 count = check_for_outstanding_allocs(g, silent);
741 nvgpu_kmem_cleanup(g, force_free);
742
743 /*
744 * If we leak objects we can either BUG() out or just WARN(). In general
745 * it doesn't make sense to BUG() on here since leaking a few objects
746 * won't crash the kernel but it can be helpful for development.
747 *
748 * If neither flag is set then we just silently do nothing.
749 */
750 if (count > 0) {
751 if (flags & NVGPU_KMEM_FINI_WARN) {
752 WARN(1, "Letting %d allocs leak!!\n", count);
753 } else if (flags & NVGPU_KMEM_FINI_BUG) {
754 gk20a_err(g->dev, "Letting %d allocs leak!!\n", count);
755 BUG();
756 }
757 }
758}
759
760int nvgpu_kmem_init(struct gk20a *g)
761{
762 int err;
763
764 g->vmallocs = kzalloc(sizeof(*g->vmallocs), GFP_KERNEL);
765 g->kmallocs = kzalloc(sizeof(*g->kmallocs), GFP_KERNEL);
766
767 if (!g->vmallocs || !g->kmallocs) {
768 err = -ENOMEM;
769 goto fail;
770 }
771
772 g->vmallocs->name = "vmalloc";
773 g->kmallocs->name = "kmalloc";
774
775 g->vmallocs->allocs = RB_ROOT;
776 g->kmallocs->allocs = RB_ROOT;
777
778 mutex_init(&g->vmallocs->lock);
779 mutex_init(&g->kmallocs->lock);
780
781 g->vmallocs->min_alloc = PAGE_SIZE;
782 g->kmallocs->min_alloc = KMALLOC_MIN_SIZE;
783
784 /*
785 * This needs to go after all the other initialization since they use
786 * the nvgpu_kzalloc() API.
787 */
788 g->vmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
789 sizeof(struct nvgpu_mem_alloc));
790 g->kmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
791 sizeof(struct nvgpu_mem_alloc));
792
793 if (!g->vmallocs->allocs_cache || !g->kmallocs->allocs_cache) {
794 err = -ENOMEM;
795 if (g->vmallocs->allocs_cache)
796 nvgpu_kmem_cache_destroy(g->vmallocs->allocs_cache);
797 if (g->kmallocs->allocs_cache)
798 nvgpu_kmem_cache_destroy(g->kmallocs->allocs_cache);
799 goto fail;
800 }
801
802 return 0;
803
804fail:
805 if (g->vmallocs)
806 kfree(g->vmallocs);
807 if (g->kmallocs)
808 kfree(g->kmallocs);
809 return err;
810}
811
812#else /* !CONFIG_NVGPU_TRACK_MEM_USAGE */
813
814int nvgpu_kmem_init(struct gk20a *g)
815{
816 return 0;
817}
818
819void nvgpu_kmem_fini(struct gk20a *g, int flags)
820{
821}
822#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
823
46struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size) 824struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size)
47{ 825{
48 struct nvgpu_kmem_cache *cache = 826 struct nvgpu_kmem_cache *cache =
49 kzalloc(sizeof(struct nvgpu_kmem_cache), GFP_KERNEL); 827 nvgpu_kzalloc(g, sizeof(struct nvgpu_kmem_cache));
50 828
51 if (!cache) 829 if (!cache)
52 return NULL; 830 return NULL;
@@ -59,7 +837,7 @@ struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size)
59 cache->cache = kmem_cache_create(cache->name, 837 cache->cache = kmem_cache_create(cache->name,
60 size, size, 0, NULL); 838 size, size, 0, NULL);
61 if (!cache->cache) { 839 if (!cache->cache) {
62 kfree(cache); 840 nvgpu_kfree(g, cache);
63 return NULL; 841 return NULL;
64 } 842 }
65 843
@@ -68,8 +846,10 @@ struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size)
68 846
69void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache) 847void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache)
70{ 848{
849 struct gk20a *g = cache->g;
850
71 kmem_cache_destroy(cache->cache); 851 kmem_cache_destroy(cache->cache);
72 kfree(cache); 852 nvgpu_kfree(g, cache);
73} 853}
74 854
75void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache) 855void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache)