summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-01-11 18:00:54 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2017-03-03 13:34:48 -0500
commit707ea45e0f1d7a07885597777496b186dd5fb6f0 (patch)
tree9b48640703ccdf0108d731e66574370179a44b23 /drivers/gpu/nvgpu/common
parent3966efc2e58f1802411f44fd00967dde448f278d (diff)
gpu: nvgpu: kmem abstraction and tracking
Implement kmem abstraction and tracking in nvgpu. The abstraction helps move nvgpu's core code away from being Linux dependent and allows kmem allocation tracking to be done for Linux and any other OS supported by nvgpu. Bug 1799159 Bug 1823380 Change-Id: Ieaae4ca1bbd1d4db4a1546616ab8b9fc53a4079d Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1283828 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common')
-rw-r--r--drivers/gpu/nvgpu/common/linux/kmem.c806
-rw-r--r--drivers/gpu/nvgpu/common/linux/kmem_priv.h90
2 files changed, 883 insertions, 13 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/kmem.c b/drivers/gpu/nvgpu/common/linux/kmem.c
index 24e0ca5d..60e79348 100644
--- a/drivers/gpu/nvgpu/common/linux/kmem.c
+++ b/drivers/gpu/nvgpu/common/linux/kmem.c
@@ -15,11 +15,22 @@
15 */ 15 */
16 16
17#include <linux/kernel.h> 17#include <linux/kernel.h>
18#include <linux/mutex.h>
18#include <linux/slab.h> 19#include <linux/slab.h>
19#include <linux/atomic.h> 20#include <linux/atomic.h>
21#include <linux/rbtree.h>
22#include <linux/debugfs.h>
23#include <linux/spinlock.h>
24#include <linux/seq_file.h>
25#include <linux/vmalloc.h>
26#include <linux/stacktrace.h>
20 27
21#include <nvgpu/kmem.h> 28#include <nvgpu/kmem.h>
22 29
30#include "gk20a/gk20a.h"
31
32#include "kmem_priv.h"
33
23/* 34/*
24 * Statically declared because this needs to be shared across all nvgpu driver 35 * Statically declared because this needs to be shared across all nvgpu driver
25 * instances. This makes sure that all kmem caches are _definitely_ uniquely 36 * instances. This makes sure that all kmem caches are _definitely_ uniquely
@@ -27,26 +38,793 @@
27 */ 38 */
28static atomic_t kmem_cache_id; 39static atomic_t kmem_cache_id;
29 40
30/* 41#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
31 * Linux specific version of the nvgpu_kmem_cache struct. This type is 42
32 * completely opaque to the rest of the driver. 43static void lock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
44{
45 mutex_lock(&tracker->lock);
46}
47
48static void unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
49{
50 mutex_unlock(&tracker->lock);
51}
52
53static void kmem_print_mem_alloc(struct gk20a *g,
54 struct nvgpu_mem_alloc *alloc,
55 struct seq_file *s)
56{
57#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
58 int i;
59
60 __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld\n",
61 alloc->addr, alloc->size);
62 for (i = 0; i < alloc->stack_length; i++)
63 __pstat(s, " %3d [<%p>] %pS\n", i,
64 (void *)alloc->stack[i],
65 (void *)alloc->stack[i]);
66 __pstat(s, "\n");
67#else
68 __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld src=%pF\n",
69 alloc->addr, alloc->size, alloc->ip);
70#endif
71}
72
73static int nvgpu_add_alloc(struct nvgpu_mem_alloc_tracker *tracker,
74 struct nvgpu_mem_alloc *alloc)
75{
76 struct rb_node **new = &tracker->allocs.rb_node;
77 struct rb_node *parent = NULL;
78
79 while (*new) {
80 struct nvgpu_mem_alloc *tmp = rb_entry(*new,
81 struct nvgpu_mem_alloc,
82 allocs_entry);
83
84 parent = *new;
85
86 if (alloc->addr < tmp->addr)
87 new = &(*new)->rb_left;
88 else if (alloc->addr > tmp->addr)
89 new = &(*new)->rb_right;
90 else
91 return -EINVAL;
92 }
93
94 /* Put the new node there */
95 rb_link_node(&alloc->allocs_entry, parent, new);
96 rb_insert_color(&alloc->allocs_entry, &tracker->allocs);
97
98 return 0;
99}
100
101static struct nvgpu_mem_alloc *nvgpu_rem_alloc(
102 struct nvgpu_mem_alloc_tracker *tracker, u64 alloc_addr)
103{
104 struct rb_node *node = tracker->allocs.rb_node;
105 struct nvgpu_mem_alloc *alloc;
106
107 while (node) {
108 alloc = container_of(node,
109 struct nvgpu_mem_alloc, allocs_entry);
110
111 if (alloc_addr < alloc->addr)
112 node = node->rb_left;
113 else if (alloc_addr > alloc->addr)
114 node = node->rb_right;
115 else
116 break;
117 }
118
119 if (!node)
120 return NULL;
121
122 rb_erase(node, &tracker->allocs);
123
124 return alloc;
125}
126
127static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
128 unsigned long size, unsigned long real_size,
129 u64 addr, unsigned long ip)
130{
131 int ret;
132 struct nvgpu_mem_alloc *alloc;
133#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
134 struct stack_trace stack_trace;
135#endif
136
137 alloc = kzalloc(sizeof(*alloc), GFP_KERNEL);
138 if (!alloc)
139 return -ENOMEM;
140
141 alloc->owner = tracker;
142 alloc->size = size;
143 alloc->real_size = real_size;
144 alloc->addr = addr;
145 alloc->ip = (void *)(uintptr_t)ip;
146
147#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
148 stack_trace.max_entries = MAX_STACK_TRACE;
149 stack_trace.nr_entries = 0;
150 stack_trace.entries = alloc->stack;
151 /*
152 * This 4 here skips the 2 function calls that happen for all traced
153 * allocs due to nvgpu:
154 *
155 * __nvgpu_save_kmem_alloc+0x7c/0x128
156 * __nvgpu_track_kzalloc+0xcc/0xf8
157 *
158 * And the function calls that get made by the stack trace code itself.
159 * If the trace savings code changes this will likely have to change
160 * as well.
161 */
162 stack_trace.skip = 4;
163 save_stack_trace(&stack_trace);
164 alloc->stack_length = stack_trace.nr_entries;
165#endif
166
167 lock_tracker(tracker);
168 tracker->bytes_alloced += size;
169 tracker->bytes_alloced_real += real_size;
170 tracker->nr_allocs++;
171
172 /* Keep track of this for building a histogram later on. */
173 if (tracker->max_alloc < size)
174 tracker->max_alloc = size;
175 if (tracker->min_alloc > size)
176 tracker->min_alloc = size;
177
178 ret = nvgpu_add_alloc(tracker, alloc);
179 if (ret) {
180 WARN(1, "Duplicate alloc??? 0x%llx\n", addr);
181 kfree(alloc);
182 unlock_tracker(tracker);
183 return ret;
184 }
185 unlock_tracker(tracker);
186
187 return 0;
188}
189
190static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
191 u64 addr)
192{
193 struct nvgpu_mem_alloc *alloc;
194
195 lock_tracker(tracker);
196 alloc = nvgpu_rem_alloc(tracker, addr);
197 if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) {
198 unlock_tracker(tracker);
199 return -EINVAL;
200 }
201
202 tracker->nr_frees++;
203 tracker->bytes_freed += alloc->size;
204 tracker->bytes_freed_real += alloc->real_size;
205 unlock_tracker(tracker);
206
207 return 0;
208}
209
210static void __nvgpu_check_valloc_size(unsigned long size)
211{
212 WARN(size < PAGE_SIZE, "Alloc smaller than page size! (%lu)!\n", size);
213}
214
215static void __nvgpu_check_kalloc_size(size_t size)
216{
217 WARN(size > PAGE_SIZE, "Alloc larger than page size! (%zu)!\n", size);
218}
219
220void *__nvgpu_track_vmalloc(struct gk20a *g, unsigned long size,
221 unsigned long ip)
222{
223 void *alloc = vmalloc(size);
224
225 if (!alloc)
226 return NULL;
227
228 kmem_dbg("vmalloc: size=%-6ld addr=0x%p", size, alloc);
229 __nvgpu_check_valloc_size(size);
230
231 /*
232 * Ignore the return message. If this fails let's not cause any issues
233 * for the rest of the driver.
234 */
235 __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
236 (u64)(uintptr_t)alloc, ip);
237
238 return alloc;
239}
240
241void *__nvgpu_track_vzalloc(struct gk20a *g, unsigned long size,
242 unsigned long ip)
243{
244 void *alloc = vzalloc(size);
245
246 if (!alloc)
247 return NULL;
248
249 kmem_dbg("vzalloc: size=%-6ld addr=0x%p", size, alloc);
250 __nvgpu_check_valloc_size(size);
251
252 /*
253 * Ignore the return message. If this fails let's not cause any issues
254 * for the rest of the driver.
255 */
256 __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
257 (u64)(uintptr_t)alloc, ip);
258
259 return alloc;
260}
261
262void *__nvgpu_track_kmalloc(struct gk20a *g, size_t size, unsigned long ip)
263{
264 void *alloc = kmalloc(size, GFP_KERNEL);
265
266 if (!alloc)
267 return NULL;
268
269 kmem_dbg("kmalloc: size=%-6ld addr=0x%p gfp=0x%08x",
270 size, alloc, GFP_KERNEL);
271 __nvgpu_check_kalloc_size(size);
272
273 __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
274 (u64)(uintptr_t)alloc, ip);
275
276 return alloc;
277}
278
279void *__nvgpu_track_kzalloc(struct gk20a *g, size_t size, unsigned long ip)
280{
281 void *alloc = kzalloc(size, GFP_KERNEL);
282
283 if (!alloc)
284 return NULL;
285
286 kmem_dbg("kzalloc: size=%-6ld addr=0x%p gfp=0x%08x",
287 size, alloc, GFP_KERNEL);
288 __nvgpu_check_kalloc_size(size);
289
290 __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
291 (u64)(uintptr_t)alloc, ip);
292
293 return alloc;
294}
295
296void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size,
297 unsigned long ip)
298{
299 void *alloc = kcalloc(n, size, GFP_KERNEL);
300
301 if (!alloc)
302 return NULL;
303
304 kmem_dbg("kcalloc: size=%-6ld addr=0x%p gfp=0x%08x",
305 n * size, alloc, GFP_KERNEL);
306 __nvgpu_check_kalloc_size(n * size);
307
308 __nvgpu_save_kmem_alloc(g->kmallocs, n * size,
309 roundup_pow_of_two(n * size),
310 (u64)(uintptr_t)alloc, ip);
311
312 return alloc;
313}
314
315void __nvgpu_track_vfree(struct gk20a *g, void *addr)
316{
317 /*
318 * Often it is accepted practice to pass NULL pointers into free
319 * functions to save code.
320 */
321 if (!addr)
322 return;
323
324 vfree(addr);
325
326 kmem_dbg("vfree: addr=0x%p", addr);
327
328 __nvgpu_free_kmem_alloc(g->vmallocs, (u64)(uintptr_t)addr);
329}
330
331void __nvgpu_track_kfree(struct gk20a *g, void *addr)
332{
333 if (!addr)
334 return;
335
336 kfree(addr);
337
338 kmem_dbg("kfree: addr=0x%p", addr);
339
340 __nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr);
341}
342
343/**
344 * to_human_readable_bytes - Determine suffix for passed size.
345 *
346 * @bytes - Number of bytes to generate a suffix for.
347 * @hr_bytes [out] - The human readable number of bytes.
348 * @hr_suffix [out] - The suffix for the HR number of bytes.
349 *
350 * Computes a human readable decomposition of the passed number of bytes. The
351 * suffix for the bytes is passed back through the @hr_suffix pointer. The right
352 * number of bytes is then passed back in @hr_bytes. This returns the following
353 * ranges:
354 *
355 * 0 - 1023 B
356 * 1 - 1023 KB
357 * 1 - 1023 MB
358 * 1 - 1023 GB
359 * 1 - 1023 TB
360 * 1 - ... PB
361 */
362static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes,
363 const char **hr_suffix)
364{
365 static const char *suffixes[] =
366 { "B", "KB", "MB", "GB", "TB", "PB" };
367
368 u64 suffix_ind = 0;
369
370 while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) {
371 bytes >>= 10;
372 suffix_ind++;
373 }
374
375 /*
376 * Handle case where bytes > 1023PB.
377 */
378 suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ?
379 suffix_ind : ARRAY_SIZE(suffixes) - 1;
380
381 *hr_bytes = bytes;
382 *hr_suffix = suffixes[suffix_ind];
383}
384
385/**
386 * print_hr_bytes - Print human readable bytes
387 *
388 * @s - A seq_file to print to. May be NULL.
389 * @msg - A message to print before the bytes.
390 * @bytes - Number of bytes.
391 *
392 * Print @msg followed by the human readable decomposition of the passed number
393 * of bytes.
394 *
395 * If @s is NULL then this prints will be made to the kernel log.
396 */
397static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes)
398{
399 u64 hr_bytes;
400 const char *hr_suffix;
401
402 __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix);
403 __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix);
404}
405
406/**
407 * print_histogram - Build a histogram of the memory usage.
408 *
409 * @tracker The tracking to pull data from.
410 * @s A seq_file to dump info into.
33 */ 411 */
34struct nvgpu_kmem_cache { 412static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker,
35 struct gk20a *g; 413 struct seq_file *s)
36 struct kmem_cache *cache; 414{
415 int i;
416 u64 pot_min, pot_max;
417 u64 nr_buckets;
418 unsigned int *buckets;
419 unsigned int total_allocs;
420 struct rb_node *node;
421 static const char histogram_line[] =
422 "++++++++++++++++++++++++++++++++++++++++";
423
424 /*
425 * pot_min is essentially a round down to the nearest power of 2. This
426 * is the start of the histogram. pot_max is just a round up to the
427 * nearest power of two. Each histogram bucket is one power of two so
428 * the histogram buckets are exponential.
429 */
430 pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc);
431 pot_max = (u64)roundup_pow_of_two(tracker->max_alloc);
432
433 nr_buckets = __ffs(pot_max) - __ffs(pot_min);
434
435 buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL);
436 if (!buckets) {
437 __pstat(s, "OOM: could not allocate bucket storage!?\n");
438 return;
439 }
37 440
38 /* 441 /*
39 * Memory to hold the kmem_cache unique name. Only necessary on our 442 * Iterate across all of the allocs and determine what bucket they
40 * k3.10 kernel when not using the SLUB allocator but it's easier to 443 * should go in. Round the size down to the nearest power of two to
41 * just carry this on to newer kernels. 444 * find the right bucket.
42 */ 445 */
43 char name[128]; 446 for (node = rb_first(&tracker->allocs);
447 node != NULL;
448 node = rb_next(node)) {
449 int b;
450 u64 bucket_min;
451 struct nvgpu_mem_alloc *alloc;
452
453 alloc = container_of(node, struct nvgpu_mem_alloc,
454 allocs_entry);
455 bucket_min = (u64)rounddown_pow_of_two(alloc->size);
456 if (bucket_min < tracker->min_alloc)
457 bucket_min = tracker->min_alloc;
458
459 b = __ffs(bucket_min) - __ffs(pot_min);
460
461 /*
462 * Handle the one case were there's an alloc exactly as big as
463 * the maximum bucket size of the largest bucket. Most of the
464 * buckets have an inclusive minimum and exclusive maximum. But
465 * the largest bucket needs to have an _inclusive_ maximum as
466 * well.
467 */
468 if (b == (int)nr_buckets)
469 b--;
470
471 buckets[b]++;
472 }
473
474 total_allocs = 0;
475 for (i = 0; i < (int)nr_buckets; i++)
476 total_allocs += buckets[i];
477
478 __pstat(s, "Alloc histogram:\n");
479
480 /*
481 * Actually compute the histogram lines.
482 */
483 for (i = 0; i < (int)nr_buckets; i++) {
484 char this_line[sizeof(histogram_line) + 1];
485 u64 line_length;
486 u64 hr_bytes;
487 const char *hr_suffix;
488
489 memset(this_line, 0, sizeof(this_line));
490
491 /*
492 * Compute the normalized line length. Cant use floating point
493 * so we will just multiply everything by 1000 and use fixed
494 * point.
495 */
496 line_length = (1000 * buckets[i]) / total_allocs;
497 line_length *= sizeof(histogram_line);
498 line_length /= 1000;
499
500 memset(this_line, '+', line_length);
501
502 __to_human_readable_bytes(1 << (__ffs(pot_min) + i),
503 &hr_bytes, &hr_suffix);
504 __pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n",
505 hr_bytes, hr_bytes << 1,
506 hr_suffix, buckets[i], this_line);
507 }
508}
509
510/**
511 * nvgpu_kmem_print_stats - Print kmem tracking stats.
512 *
513 * @tracker The tracking to pull data from.
514 * @s A seq_file to dump info into.
515 *
516 * Print stats from a tracker. If @s is non-null then seq_printf() will be
517 * used with @s. Otherwise the stats are pr_info()ed.
518 */
519void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker,
520 struct seq_file *s)
521{
522 lock_tracker(tracker);
523
524 __pstat(s, "Mem tracker: %s\n\n", tracker->name);
525
526 __pstat(s, "Basic Stats:\n");
527 __pstat(s, " Number of allocs %lld\n",
528 tracker->nr_allocs);
529 __pstat(s, " Number of frees %lld\n",
530 tracker->nr_frees);
531 print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc);
532 print_hr_bytes(s, " Largest alloc ", tracker->max_alloc);
533 print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced);
534 print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed);
535 print_hr_bytes(s, " Bytes allocated (real) ",
536 tracker->bytes_alloced_real);
537 print_hr_bytes(s, " Bytes freed (real) ",
538 tracker->bytes_freed_real);
539 __pstat(s, "\n");
540
541 print_histogram(tracker, s);
542
543 unlock_tracker(tracker);
544}
545
546#if defined(CONFIG_DEBUG_FS)
547static int __kmem_tracking_show(struct seq_file *s, void *unused)
548{
549 struct nvgpu_mem_alloc_tracker *tracker = s->private;
550
551 nvgpu_kmem_print_stats(tracker, s);
552
553 return 0;
554}
555
556static int __kmem_tracking_open(struct inode *inode, struct file *file)
557{
558 return single_open(file, __kmem_tracking_show, inode->i_private);
559}
560
561static const struct file_operations __kmem_tracking_fops = {
562 .open = __kmem_tracking_open,
563 .read = seq_read,
564 .llseek = seq_lseek,
565 .release = single_release,
566};
567
568static int __kmem_traces_dump_tracker(struct gk20a *g,
569 struct nvgpu_mem_alloc_tracker *tracker,
570 struct seq_file *s)
571{
572 struct rb_node *node;
573
574 for (node = rb_first(&tracker->allocs);
575 node != NULL;
576 node = rb_next(node)) {
577 struct nvgpu_mem_alloc *alloc;
578
579 alloc = container_of(node, struct nvgpu_mem_alloc,
580 allocs_entry);
581
582 kmem_print_mem_alloc(g, alloc, s);
583 }
584
585 return 0;
586}
587
588static int __kmem_traces_show(struct seq_file *s, void *unused)
589{
590 struct gk20a *g = s->private;
591
592 lock_tracker(g->vmallocs);
593 seq_puts(s, "Oustanding vmallocs:\n");
594 __kmem_traces_dump_tracker(g, g->vmallocs, s);
595 seq_puts(s, "\n");
596 unlock_tracker(g->vmallocs);
597
598 lock_tracker(g->kmallocs);
599 seq_puts(s, "Oustanding kmallocs:\n");
600 __kmem_traces_dump_tracker(g, g->kmallocs, s);
601 unlock_tracker(g->kmallocs);
602
603 return 0;
604}
605
606static int __kmem_traces_open(struct inode *inode, struct file *file)
607{
608 return single_open(file, __kmem_traces_show, inode->i_private);
609}
610
611static const struct file_operations __kmem_traces_fops = {
612 .open = __kmem_traces_open,
613 .read = seq_read,
614 .llseek = seq_lseek,
615 .release = single_release,
44}; 616};
45 617
618void nvgpu_kmem_debugfs_init(struct device *dev)
619{
620 struct gk20a_platform *plat = dev_get_drvdata(dev);
621 struct gk20a *g = get_gk20a(dev);
622 struct dentry *gpu_root = plat->debugfs;
623 struct dentry *node;
624
625 g->debugfs_kmem = debugfs_create_dir("kmem_tracking", gpu_root);
626 if (IS_ERR_OR_NULL(g->debugfs_kmem))
627 return;
628
629 node = debugfs_create_file(g->vmallocs->name, S_IRUGO,
630 g->debugfs_kmem,
631 g->vmallocs, &__kmem_tracking_fops);
632 node = debugfs_create_file(g->kmallocs->name, S_IRUGO,
633 g->debugfs_kmem,
634 g->kmallocs, &__kmem_tracking_fops);
635 node = debugfs_create_file("traces", S_IRUGO,
636 g->debugfs_kmem,
637 g, &__kmem_traces_fops);
638}
639#else
640void nvgpu_kmem_debugfs_init(struct device *dev)
641{
642}
643#endif
644
645static int __do_check_for_outstanding_allocs(
646 struct gk20a *g,
647 struct nvgpu_mem_alloc_tracker *tracker,
648 const char *type, bool silent)
649{
650 struct rb_node *node;
651 int count = 0;
652
653 for (node = rb_first(&tracker->allocs);
654 node != NULL;
655 node = rb_next(node)) {
656 struct nvgpu_mem_alloc *alloc;
657
658 alloc = container_of(node, struct nvgpu_mem_alloc,
659 allocs_entry);
660
661 if (!silent)
662 kmem_print_mem_alloc(g, alloc, NULL);
663
664 count++;
665 }
666
667 return count;
668}
669
670/**
671 * check_for_outstanding_allocs - Count and display outstanding allocs
672 *
673 * @g - The GPU.
674 * @silent - If set don't print anything about the allocs.
675 *
676 * Dump (or just count) the number of allocations left outstanding.
677 */
678static int check_for_outstanding_allocs(struct gk20a *g, bool silent)
679{
680 int count = 0;
681
682 count += __do_check_for_outstanding_allocs(g, g->kmallocs, "kmalloc",
683 silent);
684 count += __do_check_for_outstanding_allocs(g, g->vmallocs, "vmalloc",
685 silent);
686
687 return count;
688}
689
690static void do_nvgpu_kmem_cleanup(struct nvgpu_mem_alloc_tracker *tracker,
691 void (*force_free_func)(const void *))
692{
693 struct rb_node *node;
694
695 while ((node = rb_first(&tracker->allocs)) != NULL) {
696 struct nvgpu_mem_alloc *alloc;
697
698 alloc = container_of(node, struct nvgpu_mem_alloc,
699 allocs_entry);
700 if (force_free_func)
701 force_free_func((void *)alloc->addr);
702
703 kfree(alloc);
704 }
705}
706
707/**
708 * nvgpu_kmem_cleanup - Cleanup the kmem tracking
709 *
710 * @g - The GPU.
711 * @force_free - If set will also free leaked objects if possible.
712 *
713 * Cleanup all of the allocs made by nvgpu_kmem tracking code. If @force_free
714 * is non-zero then the allocation made by nvgpu is also freed. This is risky,
715 * though, as it is possible that the memory is still in use by other parts of
716 * the GPU driver not aware that this has happened.
717 *
718 * In theory it should be fine if the GPU driver has been deinitialized and
719 * there are no bugs in that code. However, if there are any bugs in that code
720 * then they could likely manifest as odd crashes indeterminate amounts of time
721 * in the future. So use @force_free at your own risk.
722 */
723static void nvgpu_kmem_cleanup(struct gk20a *g, bool force_free)
724{
725 do_nvgpu_kmem_cleanup(g->kmallocs, force_free ? kfree : NULL);
726 do_nvgpu_kmem_cleanup(g->vmallocs, force_free ? vfree : NULL);
727}
728
729void nvgpu_kmem_fini(struct gk20a *g, int flags)
730{
731 int count;
732 bool silent, force_free;
733
734 if (!flags)
735 return;
736
737 silent = !(flags & NVGPU_KMEM_FINI_DUMP_ALLOCS);
738 force_free = !!(flags & NVGPU_KMEM_FINI_FORCE_CLEANUP);
739
740 count = check_for_outstanding_allocs(g, silent);
741 nvgpu_kmem_cleanup(g, force_free);
742
743 /*
744 * If we leak objects we can either BUG() out or just WARN(). In general
745 * it doesn't make sense to BUG() on here since leaking a few objects
746 * won't crash the kernel but it can be helpful for development.
747 *
748 * If neither flag is set then we just silently do nothing.
749 */
750 if (count > 0) {
751 if (flags & NVGPU_KMEM_FINI_WARN) {
752 WARN(1, "Letting %d allocs leak!!\n", count);
753 } else if (flags & NVGPU_KMEM_FINI_BUG) {
754 gk20a_err(g->dev, "Letting %d allocs leak!!\n", count);
755 BUG();
756 }
757 }
758}
759
760int nvgpu_kmem_init(struct gk20a *g)
761{
762 int err;
763
764 g->vmallocs = kzalloc(sizeof(*g->vmallocs), GFP_KERNEL);
765 g->kmallocs = kzalloc(sizeof(*g->kmallocs), GFP_KERNEL);
766
767 if (!g->vmallocs || !g->kmallocs) {
768 err = -ENOMEM;
769 goto fail;
770 }
771
772 g->vmallocs->name = "vmalloc";
773 g->kmallocs->name = "kmalloc";
774
775 g->vmallocs->allocs = RB_ROOT;
776 g->kmallocs->allocs = RB_ROOT;
777
778 mutex_init(&g->vmallocs->lock);
779 mutex_init(&g->kmallocs->lock);
780
781 g->vmallocs->min_alloc = PAGE_SIZE;
782 g->kmallocs->min_alloc = KMALLOC_MIN_SIZE;
783
784 /*
785 * This needs to go after all the other initialization since they use
786 * the nvgpu_kzalloc() API.
787 */
788 g->vmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
789 sizeof(struct nvgpu_mem_alloc));
790 g->kmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
791 sizeof(struct nvgpu_mem_alloc));
792
793 if (!g->vmallocs->allocs_cache || !g->kmallocs->allocs_cache) {
794 err = -ENOMEM;
795 if (g->vmallocs->allocs_cache)
796 nvgpu_kmem_cache_destroy(g->vmallocs->allocs_cache);
797 if (g->kmallocs->allocs_cache)
798 nvgpu_kmem_cache_destroy(g->kmallocs->allocs_cache);
799 goto fail;
800 }
801
802 return 0;
803
804fail:
805 if (g->vmallocs)
806 kfree(g->vmallocs);
807 if (g->kmallocs)
808 kfree(g->kmallocs);
809 return err;
810}
811
812#else /* !CONFIG_NVGPU_TRACK_MEM_USAGE */
813
814int nvgpu_kmem_init(struct gk20a *g)
815{
816 return 0;
817}
818
819void nvgpu_kmem_fini(struct gk20a *g, int flags)
820{
821}
822#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
823
46struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size) 824struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size)
47{ 825{
48 struct nvgpu_kmem_cache *cache = 826 struct nvgpu_kmem_cache *cache =
49 kzalloc(sizeof(struct nvgpu_kmem_cache), GFP_KERNEL); 827 nvgpu_kzalloc(g, sizeof(struct nvgpu_kmem_cache));
50 828
51 if (!cache) 829 if (!cache)
52 return NULL; 830 return NULL;
@@ -59,7 +837,7 @@ struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size)
59 cache->cache = kmem_cache_create(cache->name, 837 cache->cache = kmem_cache_create(cache->name,
60 size, size, 0, NULL); 838 size, size, 0, NULL);
61 if (!cache->cache) { 839 if (!cache->cache) {
62 kfree(cache); 840 nvgpu_kfree(g, cache);
63 return NULL; 841 return NULL;
64 } 842 }
65 843
@@ -68,8 +846,10 @@ struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size)
68 846
69void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache) 847void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache)
70{ 848{
849 struct gk20a *g = cache->g;
850
71 kmem_cache_destroy(cache->cache); 851 kmem_cache_destroy(cache->cache);
72 kfree(cache); 852 nvgpu_kfree(g, cache);
73} 853}
74 854
75void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache) 855void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache)
diff --git a/drivers/gpu/nvgpu/common/linux/kmem_priv.h b/drivers/gpu/nvgpu/common/linux/kmem_priv.h
new file mode 100644
index 00000000..5e38ad5d
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/kmem_priv.h
@@ -0,0 +1,90 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __KMEM_PRIV_H__
18#define __KMEM_PRIV_H__
19
20#include <linux/rbtree.h>
21
22#define __pstat(s, fmt, msg...) \
23 do { \
24 if (s) \
25 seq_printf(s, fmt, ##msg); \
26 else \
27 pr_info(fmt, ##msg); \
28 } while (0)
29
30#define MAX_STACK_TRACE 20
31
32/*
33 * Linux specific version of the nvgpu_kmem_cache struct. This type is
34 * completely opaque to the rest of the driver.
35 */
36struct nvgpu_kmem_cache {
37 struct gk20a *g;
38 struct kmem_cache *cache;
39
40 /*
41 * Memory to hold the kmem_cache unique name. Only necessary on our
42 * k3.10 kernel when not using the SLUB allocator but it's easier to
43 * just carry this on to newer kernels.
44 */
45 char name[128];
46};
47
48#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
49
50struct nvgpu_mem_alloc {
51 struct nvgpu_mem_alloc_tracker *owner;
52
53 void *ip;
54#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
55 unsigned long stack[MAX_STACK_TRACE];
56 int stack_length;
57#endif
58
59 u64 addr;
60
61 unsigned long size;
62 unsigned long real_size;
63
64 /* Ugh - linux specific. Will need to be abstracted. */
65 struct rb_node allocs_entry;
66};
67
68/*
69 * Linux specific tracking of vmalloc, kmalloc, etc.
70 */
71struct nvgpu_mem_alloc_tracker {
72 const char *name;
73 struct nvgpu_kmem_cache *allocs_cache;
74 struct rb_root allocs;
75 struct mutex lock;
76
77 u64 bytes_alloced;
78 u64 bytes_freed;
79 u64 bytes_alloced_real;
80 u64 bytes_freed_real;
81 u64 nr_allocs;
82 u64 nr_frees;
83
84 unsigned long min_alloc;
85 unsigned long max_alloc;
86};
87
88#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
89
90#endif /* __KMEM_PRIV_H__ */