summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/linux/kmem.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/kmem.c')
-rw-r--r--drivers/gpu/nvgpu/common/linux/kmem.c323
1 files changed, 11 insertions, 312 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/kmem.c b/drivers/gpu/nvgpu/common/linux/kmem.c
index d058eba5..41aaa729 100644
--- a/drivers/gpu/nvgpu/common/linux/kmem.c
+++ b/drivers/gpu/nvgpu/common/linux/kmem.c
@@ -134,19 +134,19 @@ void __nvgpu_vfree(struct gk20a *g, void *addr)
134 134
135#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE 135#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
136 136
137static void lock_tracker(struct nvgpu_mem_alloc_tracker *tracker) 137void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
138{ 138{
139 nvgpu_mutex_acquire(&tracker->lock); 139 nvgpu_mutex_acquire(&tracker->lock);
140} 140}
141 141
142static void unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker) 142void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
143{ 143{
144 nvgpu_mutex_release(&tracker->lock); 144 nvgpu_mutex_release(&tracker->lock);
145} 145}
146 146
147static void kmem_print_mem_alloc(struct gk20a *g, 147void kmem_print_mem_alloc(struct gk20a *g,
148 struct nvgpu_mem_alloc *alloc, 148 struct nvgpu_mem_alloc *alloc,
149 struct seq_file *s) 149 struct seq_file *s)
150{ 150{
151#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES 151#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
152 int i; 152 int i;
@@ -231,7 +231,7 @@ static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
231 alloc->stack_length = stack_trace.nr_entries; 231 alloc->stack_length = stack_trace.nr_entries;
232#endif 232#endif
233 233
234 lock_tracker(tracker); 234 nvgpu_lock_tracker(tracker);
235 tracker->bytes_alloced += size; 235 tracker->bytes_alloced += size;
236 tracker->bytes_alloced_real += real_size; 236 tracker->bytes_alloced_real += real_size;
237 tracker->nr_allocs++; 237 tracker->nr_allocs++;
@@ -246,10 +246,10 @@ static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
246 if (ret) { 246 if (ret) {
247 WARN(1, "Duplicate alloc??? 0x%llx\n", addr); 247 WARN(1, "Duplicate alloc??? 0x%llx\n", addr);
248 kfree(alloc); 248 kfree(alloc);
249 unlock_tracker(tracker); 249 nvgpu_unlock_tracker(tracker);
250 return ret; 250 return ret;
251 } 251 }
252 unlock_tracker(tracker); 252 nvgpu_unlock_tracker(tracker);
253 253
254 return 0; 254 return 0;
255} 255}
@@ -259,17 +259,17 @@ static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
259{ 259{
260 struct nvgpu_mem_alloc *alloc; 260 struct nvgpu_mem_alloc *alloc;
261 261
262 lock_tracker(tracker); 262 nvgpu_lock_tracker(tracker);
263 alloc = nvgpu_rem_alloc(tracker, addr); 263 alloc = nvgpu_rem_alloc(tracker, addr);
264 if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) { 264 if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) {
265 unlock_tracker(tracker); 265 nvgpu_unlock_tracker(tracker);
266 return -EINVAL; 266 return -EINVAL;
267 } 267 }
268 268
269 tracker->nr_frees++; 269 tracker->nr_frees++;
270 tracker->bytes_freed += alloc->size; 270 tracker->bytes_freed += alloc->size;
271 tracker->bytes_freed_real += alloc->real_size; 271 tracker->bytes_freed_real += alloc->real_size;
272 unlock_tracker(tracker); 272 nvgpu_unlock_tracker(tracker);
273 273
274 return 0; 274 return 0;
275} 275}
@@ -407,307 +407,6 @@ void __nvgpu_track_kfree(struct gk20a *g, void *addr)
407 __nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr); 407 __nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr);
408} 408}
409 409
410/**
411 * to_human_readable_bytes - Determine suffix for passed size.
412 *
413 * @bytes - Number of bytes to generate a suffix for.
414 * @hr_bytes [out] - The human readable number of bytes.
415 * @hr_suffix [out] - The suffix for the HR number of bytes.
416 *
417 * Computes a human readable decomposition of the passed number of bytes. The
418 * suffix for the bytes is passed back through the @hr_suffix pointer. The right
419 * number of bytes is then passed back in @hr_bytes. This returns the following
420 * ranges:
421 *
422 * 0 - 1023 B
423 * 1 - 1023 KB
424 * 1 - 1023 MB
425 * 1 - 1023 GB
426 * 1 - 1023 TB
427 * 1 - ... PB
428 */
429static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes,
430 const char **hr_suffix)
431{
432 static const char *suffixes[] =
433 { "B", "KB", "MB", "GB", "TB", "PB" };
434
435 u64 suffix_ind = 0;
436
437 while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) {
438 bytes >>= 10;
439 suffix_ind++;
440 }
441
442 /*
443 * Handle case where bytes > 1023PB.
444 */
445 suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ?
446 suffix_ind : ARRAY_SIZE(suffixes) - 1;
447
448 *hr_bytes = bytes;
449 *hr_suffix = suffixes[suffix_ind];
450}
451
452/**
453 * print_hr_bytes - Print human readable bytes
454 *
455 * @s - A seq_file to print to. May be NULL.
456 * @msg - A message to print before the bytes.
457 * @bytes - Number of bytes.
458 *
459 * Print @msg followed by the human readable decomposition of the passed number
460 * of bytes.
461 *
462 * If @s is NULL then this prints will be made to the kernel log.
463 */
464static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes)
465{
466 u64 hr_bytes;
467 const char *hr_suffix;
468
469 __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix);
470 __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix);
471}
472
473/**
474 * print_histogram - Build a histogram of the memory usage.
475 *
476 * @tracker The tracking to pull data from.
477 * @s A seq_file to dump info into.
478 */
479static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker,
480 struct seq_file *s)
481{
482 int i;
483 u64 pot_min, pot_max;
484 u64 nr_buckets;
485 unsigned int *buckets;
486 unsigned int total_allocs;
487 struct nvgpu_rbtree_node *node;
488 static const char histogram_line[] =
489 "++++++++++++++++++++++++++++++++++++++++";
490
491 /*
492 * pot_min is essentially a round down to the nearest power of 2. This
493 * is the start of the histogram. pot_max is just a round up to the
494 * nearest power of two. Each histogram bucket is one power of two so
495 * the histogram buckets are exponential.
496 */
497 pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc);
498 pot_max = (u64)roundup_pow_of_two(tracker->max_alloc);
499
500 nr_buckets = __ffs(pot_max) - __ffs(pot_min);
501
502 buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL);
503 if (!buckets) {
504 __pstat(s, "OOM: could not allocate bucket storage!?\n");
505 return;
506 }
507
508 /*
509 * Iterate across all of the allocs and determine what bucket they
510 * should go in. Round the size down to the nearest power of two to
511 * find the right bucket.
512 */
513 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
514 while (node) {
515 int b;
516 u64 bucket_min;
517 struct nvgpu_mem_alloc *alloc =
518 nvgpu_mem_alloc_from_rbtree_node(node);
519
520 bucket_min = (u64)rounddown_pow_of_two(alloc->size);
521 if (bucket_min < tracker->min_alloc)
522 bucket_min = tracker->min_alloc;
523
524 b = __ffs(bucket_min) - __ffs(pot_min);
525
526 /*
527 * Handle the one case were there's an alloc exactly as big as
528 * the maximum bucket size of the largest bucket. Most of the
529 * buckets have an inclusive minimum and exclusive maximum. But
530 * the largest bucket needs to have an _inclusive_ maximum as
531 * well.
532 */
533 if (b == (int)nr_buckets)
534 b--;
535
536 buckets[b]++;
537
538 nvgpu_rbtree_enum_next(&node, node);
539 }
540
541 total_allocs = 0;
542 for (i = 0; i < (int)nr_buckets; i++)
543 total_allocs += buckets[i];
544
545 __pstat(s, "Alloc histogram:\n");
546
547 /*
548 * Actually compute the histogram lines.
549 */
550 for (i = 0; i < (int)nr_buckets; i++) {
551 char this_line[sizeof(histogram_line) + 1];
552 u64 line_length;
553 u64 hr_bytes;
554 const char *hr_suffix;
555
556 memset(this_line, 0, sizeof(this_line));
557
558 /*
559 * Compute the normalized line length. Cant use floating point
560 * so we will just multiply everything by 1000 and use fixed
561 * point.
562 */
563 line_length = (1000 * buckets[i]) / total_allocs;
564 line_length *= sizeof(histogram_line);
565 line_length /= 1000;
566
567 memset(this_line, '+', line_length);
568
569 __to_human_readable_bytes(1 << (__ffs(pot_min) + i),
570 &hr_bytes, &hr_suffix);
571 __pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n",
572 hr_bytes, hr_bytes << 1,
573 hr_suffix, buckets[i], this_line);
574 }
575}
576
577#ifdef CONFIG_DEBUG_FS
578/**
579 * nvgpu_kmem_print_stats - Print kmem tracking stats.
580 *
581 * @tracker The tracking to pull data from.
582 * @s A seq_file to dump info into.
583 *
584 * Print stats from a tracker. If @s is non-null then seq_printf() will be
585 * used with @s. Otherwise the stats are pr_info()ed.
586 */
587void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker,
588 struct seq_file *s)
589{
590 lock_tracker(tracker);
591
592 __pstat(s, "Mem tracker: %s\n\n", tracker->name);
593
594 __pstat(s, "Basic Stats:\n");
595 __pstat(s, " Number of allocs %lld\n",
596 tracker->nr_allocs);
597 __pstat(s, " Number of frees %lld\n",
598 tracker->nr_frees);
599 print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc);
600 print_hr_bytes(s, " Largest alloc ", tracker->max_alloc);
601 print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced);
602 print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed);
603 print_hr_bytes(s, " Bytes allocated (real) ",
604 tracker->bytes_alloced_real);
605 print_hr_bytes(s, " Bytes freed (real) ",
606 tracker->bytes_freed_real);
607 __pstat(s, "\n");
608
609 print_histogram(tracker, s);
610
611 unlock_tracker(tracker);
612}
613
614static int __kmem_tracking_show(struct seq_file *s, void *unused)
615{
616 struct nvgpu_mem_alloc_tracker *tracker = s->private;
617
618 nvgpu_kmem_print_stats(tracker, s);
619
620 return 0;
621}
622
623static int __kmem_tracking_open(struct inode *inode, struct file *file)
624{
625 return single_open(file, __kmem_tracking_show, inode->i_private);
626}
627
628static const struct file_operations __kmem_tracking_fops = {
629 .open = __kmem_tracking_open,
630 .read = seq_read,
631 .llseek = seq_lseek,
632 .release = single_release,
633};
634
635static int __kmem_traces_dump_tracker(struct gk20a *g,
636 struct nvgpu_mem_alloc_tracker *tracker,
637 struct seq_file *s)
638{
639 struct nvgpu_rbtree_node *node;
640
641 nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
642 while (node) {
643 struct nvgpu_mem_alloc *alloc =
644 nvgpu_mem_alloc_from_rbtree_node(node);
645
646 kmem_print_mem_alloc(g, alloc, s);
647
648 nvgpu_rbtree_enum_next(&node, node);
649 }
650
651 return 0;
652}
653
654static int __kmem_traces_show(struct seq_file *s, void *unused)
655{
656 struct gk20a *g = s->private;
657
658 lock_tracker(g->vmallocs);
659 seq_puts(s, "Oustanding vmallocs:\n");
660 __kmem_traces_dump_tracker(g, g->vmallocs, s);
661 seq_puts(s, "\n");
662 unlock_tracker(g->vmallocs);
663
664 lock_tracker(g->kmallocs);
665 seq_puts(s, "Oustanding kmallocs:\n");
666 __kmem_traces_dump_tracker(g, g->kmallocs, s);
667 unlock_tracker(g->kmallocs);
668
669 return 0;
670}
671
672static int __kmem_traces_open(struct inode *inode, struct file *file)
673{
674 return single_open(file, __kmem_traces_show, inode->i_private);
675}
676
677static const struct file_operations __kmem_traces_fops = {
678 .open = __kmem_traces_open,
679 .read = seq_read,
680 .llseek = seq_lseek,
681 .release = single_release,
682};
683
684void nvgpu_kmem_debugfs_init(struct device *dev)
685{
686 struct gk20a_platform *plat = dev_get_drvdata(dev);
687 struct gk20a *g = get_gk20a(dev);
688 struct dentry *gpu_root = plat->debugfs;
689 struct dentry *node;
690
691 g->debugfs_kmem = debugfs_create_dir("kmem_tracking", gpu_root);
692 if (IS_ERR_OR_NULL(g->debugfs_kmem))
693 return;
694
695 node = debugfs_create_file(g->vmallocs->name, S_IRUGO,
696 g->debugfs_kmem,
697 g->vmallocs, &__kmem_tracking_fops);
698 node = debugfs_create_file(g->kmallocs->name, S_IRUGO,
699 g->debugfs_kmem,
700 g->kmallocs, &__kmem_tracking_fops);
701 node = debugfs_create_file("traces", S_IRUGO,
702 g->debugfs_kmem,
703 g, &__kmem_traces_fops);
704}
705#else
706void nvgpu_kmem_debugfs_init(struct device *dev)
707{
708}
709#endif
710
711static int __do_check_for_outstanding_allocs( 410static int __do_check_for_outstanding_allocs(
712 struct gk20a *g, 411 struct gk20a *g,
713 struct nvgpu_mem_alloc_tracker *tracker, 412 struct nvgpu_mem_alloc_tracker *tracker,