diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/kmem.c')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/kmem.c | 323 |
1 files changed, 11 insertions, 312 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/kmem.c b/drivers/gpu/nvgpu/common/linux/kmem.c index d058eba5..41aaa729 100644 --- a/drivers/gpu/nvgpu/common/linux/kmem.c +++ b/drivers/gpu/nvgpu/common/linux/kmem.c | |||
@@ -134,19 +134,19 @@ void __nvgpu_vfree(struct gk20a *g, void *addr) | |||
134 | 134 | ||
135 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE | 135 | #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE |
136 | 136 | ||
137 | static void lock_tracker(struct nvgpu_mem_alloc_tracker *tracker) | 137 | void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker) |
138 | { | 138 | { |
139 | nvgpu_mutex_acquire(&tracker->lock); | 139 | nvgpu_mutex_acquire(&tracker->lock); |
140 | } | 140 | } |
141 | 141 | ||
142 | static void unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker) | 142 | void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker) |
143 | { | 143 | { |
144 | nvgpu_mutex_release(&tracker->lock); | 144 | nvgpu_mutex_release(&tracker->lock); |
145 | } | 145 | } |
146 | 146 | ||
147 | static void kmem_print_mem_alloc(struct gk20a *g, | 147 | void kmem_print_mem_alloc(struct gk20a *g, |
148 | struct nvgpu_mem_alloc *alloc, | 148 | struct nvgpu_mem_alloc *alloc, |
149 | struct seq_file *s) | 149 | struct seq_file *s) |
150 | { | 150 | { |
151 | #ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES | 151 | #ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES |
152 | int i; | 152 | int i; |
@@ -231,7 +231,7 @@ static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker, | |||
231 | alloc->stack_length = stack_trace.nr_entries; | 231 | alloc->stack_length = stack_trace.nr_entries; |
232 | #endif | 232 | #endif |
233 | 233 | ||
234 | lock_tracker(tracker); | 234 | nvgpu_lock_tracker(tracker); |
235 | tracker->bytes_alloced += size; | 235 | tracker->bytes_alloced += size; |
236 | tracker->bytes_alloced_real += real_size; | 236 | tracker->bytes_alloced_real += real_size; |
237 | tracker->nr_allocs++; | 237 | tracker->nr_allocs++; |
@@ -246,10 +246,10 @@ static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker, | |||
246 | if (ret) { | 246 | if (ret) { |
247 | WARN(1, "Duplicate alloc??? 0x%llx\n", addr); | 247 | WARN(1, "Duplicate alloc??? 0x%llx\n", addr); |
248 | kfree(alloc); | 248 | kfree(alloc); |
249 | unlock_tracker(tracker); | 249 | nvgpu_unlock_tracker(tracker); |
250 | return ret; | 250 | return ret; |
251 | } | 251 | } |
252 | unlock_tracker(tracker); | 252 | nvgpu_unlock_tracker(tracker); |
253 | 253 | ||
254 | return 0; | 254 | return 0; |
255 | } | 255 | } |
@@ -259,17 +259,17 @@ static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker, | |||
259 | { | 259 | { |
260 | struct nvgpu_mem_alloc *alloc; | 260 | struct nvgpu_mem_alloc *alloc; |
261 | 261 | ||
262 | lock_tracker(tracker); | 262 | nvgpu_lock_tracker(tracker); |
263 | alloc = nvgpu_rem_alloc(tracker, addr); | 263 | alloc = nvgpu_rem_alloc(tracker, addr); |
264 | if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) { | 264 | if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) { |
265 | unlock_tracker(tracker); | 265 | nvgpu_unlock_tracker(tracker); |
266 | return -EINVAL; | 266 | return -EINVAL; |
267 | } | 267 | } |
268 | 268 | ||
269 | tracker->nr_frees++; | 269 | tracker->nr_frees++; |
270 | tracker->bytes_freed += alloc->size; | 270 | tracker->bytes_freed += alloc->size; |
271 | tracker->bytes_freed_real += alloc->real_size; | 271 | tracker->bytes_freed_real += alloc->real_size; |
272 | unlock_tracker(tracker); | 272 | nvgpu_unlock_tracker(tracker); |
273 | 273 | ||
274 | return 0; | 274 | return 0; |
275 | } | 275 | } |
@@ -407,307 +407,6 @@ void __nvgpu_track_kfree(struct gk20a *g, void *addr) | |||
407 | __nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr); | 407 | __nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr); |
408 | } | 408 | } |
409 | 409 | ||
410 | /** | ||
411 | * to_human_readable_bytes - Determine suffix for passed size. | ||
412 | * | ||
413 | * @bytes - Number of bytes to generate a suffix for. | ||
414 | * @hr_bytes [out] - The human readable number of bytes. | ||
415 | * @hr_suffix [out] - The suffix for the HR number of bytes. | ||
416 | * | ||
417 | * Computes a human readable decomposition of the passed number of bytes. The | ||
418 | * suffix for the bytes is passed back through the @hr_suffix pointer. The right | ||
419 | * number of bytes is then passed back in @hr_bytes. This returns the following | ||
420 | * ranges: | ||
421 | * | ||
422 | * 0 - 1023 B | ||
423 | * 1 - 1023 KB | ||
424 | * 1 - 1023 MB | ||
425 | * 1 - 1023 GB | ||
426 | * 1 - 1023 TB | ||
427 | * 1 - ... PB | ||
428 | */ | ||
429 | static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes, | ||
430 | const char **hr_suffix) | ||
431 | { | ||
432 | static const char *suffixes[] = | ||
433 | { "B", "KB", "MB", "GB", "TB", "PB" }; | ||
434 | |||
435 | u64 suffix_ind = 0; | ||
436 | |||
437 | while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) { | ||
438 | bytes >>= 10; | ||
439 | suffix_ind++; | ||
440 | } | ||
441 | |||
442 | /* | ||
443 | * Handle case where bytes > 1023PB. | ||
444 | */ | ||
445 | suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ? | ||
446 | suffix_ind : ARRAY_SIZE(suffixes) - 1; | ||
447 | |||
448 | *hr_bytes = bytes; | ||
449 | *hr_suffix = suffixes[suffix_ind]; | ||
450 | } | ||
451 | |||
452 | /** | ||
453 | * print_hr_bytes - Print human readable bytes | ||
454 | * | ||
455 | * @s - A seq_file to print to. May be NULL. | ||
456 | * @msg - A message to print before the bytes. | ||
457 | * @bytes - Number of bytes. | ||
458 | * | ||
459 | * Print @msg followed by the human readable decomposition of the passed number | ||
460 | * of bytes. | ||
461 | * | ||
462 | * If @s is NULL then this prints will be made to the kernel log. | ||
463 | */ | ||
464 | static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes) | ||
465 | { | ||
466 | u64 hr_bytes; | ||
467 | const char *hr_suffix; | ||
468 | |||
469 | __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix); | ||
470 | __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix); | ||
471 | } | ||
472 | |||
473 | /** | ||
474 | * print_histogram - Build a histogram of the memory usage. | ||
475 | * | ||
476 | * @tracker The tracking to pull data from. | ||
477 | * @s A seq_file to dump info into. | ||
478 | */ | ||
479 | static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker, | ||
480 | struct seq_file *s) | ||
481 | { | ||
482 | int i; | ||
483 | u64 pot_min, pot_max; | ||
484 | u64 nr_buckets; | ||
485 | unsigned int *buckets; | ||
486 | unsigned int total_allocs; | ||
487 | struct nvgpu_rbtree_node *node; | ||
488 | static const char histogram_line[] = | ||
489 | "++++++++++++++++++++++++++++++++++++++++"; | ||
490 | |||
491 | /* | ||
492 | * pot_min is essentially a round down to the nearest power of 2. This | ||
493 | * is the start of the histogram. pot_max is just a round up to the | ||
494 | * nearest power of two. Each histogram bucket is one power of two so | ||
495 | * the histogram buckets are exponential. | ||
496 | */ | ||
497 | pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc); | ||
498 | pot_max = (u64)roundup_pow_of_two(tracker->max_alloc); | ||
499 | |||
500 | nr_buckets = __ffs(pot_max) - __ffs(pot_min); | ||
501 | |||
502 | buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL); | ||
503 | if (!buckets) { | ||
504 | __pstat(s, "OOM: could not allocate bucket storage!?\n"); | ||
505 | return; | ||
506 | } | ||
507 | |||
508 | /* | ||
509 | * Iterate across all of the allocs and determine what bucket they | ||
510 | * should go in. Round the size down to the nearest power of two to | ||
511 | * find the right bucket. | ||
512 | */ | ||
513 | nvgpu_rbtree_enum_start(0, &node, tracker->allocs); | ||
514 | while (node) { | ||
515 | int b; | ||
516 | u64 bucket_min; | ||
517 | struct nvgpu_mem_alloc *alloc = | ||
518 | nvgpu_mem_alloc_from_rbtree_node(node); | ||
519 | |||
520 | bucket_min = (u64)rounddown_pow_of_two(alloc->size); | ||
521 | if (bucket_min < tracker->min_alloc) | ||
522 | bucket_min = tracker->min_alloc; | ||
523 | |||
524 | b = __ffs(bucket_min) - __ffs(pot_min); | ||
525 | |||
526 | /* | ||
527 | * Handle the one case were there's an alloc exactly as big as | ||
528 | * the maximum bucket size of the largest bucket. Most of the | ||
529 | * buckets have an inclusive minimum and exclusive maximum. But | ||
530 | * the largest bucket needs to have an _inclusive_ maximum as | ||
531 | * well. | ||
532 | */ | ||
533 | if (b == (int)nr_buckets) | ||
534 | b--; | ||
535 | |||
536 | buckets[b]++; | ||
537 | |||
538 | nvgpu_rbtree_enum_next(&node, node); | ||
539 | } | ||
540 | |||
541 | total_allocs = 0; | ||
542 | for (i = 0; i < (int)nr_buckets; i++) | ||
543 | total_allocs += buckets[i]; | ||
544 | |||
545 | __pstat(s, "Alloc histogram:\n"); | ||
546 | |||
547 | /* | ||
548 | * Actually compute the histogram lines. | ||
549 | */ | ||
550 | for (i = 0; i < (int)nr_buckets; i++) { | ||
551 | char this_line[sizeof(histogram_line) + 1]; | ||
552 | u64 line_length; | ||
553 | u64 hr_bytes; | ||
554 | const char *hr_suffix; | ||
555 | |||
556 | memset(this_line, 0, sizeof(this_line)); | ||
557 | |||
558 | /* | ||
559 | * Compute the normalized line length. Cant use floating point | ||
560 | * so we will just multiply everything by 1000 and use fixed | ||
561 | * point. | ||
562 | */ | ||
563 | line_length = (1000 * buckets[i]) / total_allocs; | ||
564 | line_length *= sizeof(histogram_line); | ||
565 | line_length /= 1000; | ||
566 | |||
567 | memset(this_line, '+', line_length); | ||
568 | |||
569 | __to_human_readable_bytes(1 << (__ffs(pot_min) + i), | ||
570 | &hr_bytes, &hr_suffix); | ||
571 | __pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n", | ||
572 | hr_bytes, hr_bytes << 1, | ||
573 | hr_suffix, buckets[i], this_line); | ||
574 | } | ||
575 | } | ||
576 | |||
577 | #ifdef CONFIG_DEBUG_FS | ||
578 | /** | ||
579 | * nvgpu_kmem_print_stats - Print kmem tracking stats. | ||
580 | * | ||
581 | * @tracker The tracking to pull data from. | ||
582 | * @s A seq_file to dump info into. | ||
583 | * | ||
584 | * Print stats from a tracker. If @s is non-null then seq_printf() will be | ||
585 | * used with @s. Otherwise the stats are pr_info()ed. | ||
586 | */ | ||
587 | void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker, | ||
588 | struct seq_file *s) | ||
589 | { | ||
590 | lock_tracker(tracker); | ||
591 | |||
592 | __pstat(s, "Mem tracker: %s\n\n", tracker->name); | ||
593 | |||
594 | __pstat(s, "Basic Stats:\n"); | ||
595 | __pstat(s, " Number of allocs %lld\n", | ||
596 | tracker->nr_allocs); | ||
597 | __pstat(s, " Number of frees %lld\n", | ||
598 | tracker->nr_frees); | ||
599 | print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc); | ||
600 | print_hr_bytes(s, " Largest alloc ", tracker->max_alloc); | ||
601 | print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced); | ||
602 | print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed); | ||
603 | print_hr_bytes(s, " Bytes allocated (real) ", | ||
604 | tracker->bytes_alloced_real); | ||
605 | print_hr_bytes(s, " Bytes freed (real) ", | ||
606 | tracker->bytes_freed_real); | ||
607 | __pstat(s, "\n"); | ||
608 | |||
609 | print_histogram(tracker, s); | ||
610 | |||
611 | unlock_tracker(tracker); | ||
612 | } | ||
613 | |||
614 | static int __kmem_tracking_show(struct seq_file *s, void *unused) | ||
615 | { | ||
616 | struct nvgpu_mem_alloc_tracker *tracker = s->private; | ||
617 | |||
618 | nvgpu_kmem_print_stats(tracker, s); | ||
619 | |||
620 | return 0; | ||
621 | } | ||
622 | |||
623 | static int __kmem_tracking_open(struct inode *inode, struct file *file) | ||
624 | { | ||
625 | return single_open(file, __kmem_tracking_show, inode->i_private); | ||
626 | } | ||
627 | |||
628 | static const struct file_operations __kmem_tracking_fops = { | ||
629 | .open = __kmem_tracking_open, | ||
630 | .read = seq_read, | ||
631 | .llseek = seq_lseek, | ||
632 | .release = single_release, | ||
633 | }; | ||
634 | |||
635 | static int __kmem_traces_dump_tracker(struct gk20a *g, | ||
636 | struct nvgpu_mem_alloc_tracker *tracker, | ||
637 | struct seq_file *s) | ||
638 | { | ||
639 | struct nvgpu_rbtree_node *node; | ||
640 | |||
641 | nvgpu_rbtree_enum_start(0, &node, tracker->allocs); | ||
642 | while (node) { | ||
643 | struct nvgpu_mem_alloc *alloc = | ||
644 | nvgpu_mem_alloc_from_rbtree_node(node); | ||
645 | |||
646 | kmem_print_mem_alloc(g, alloc, s); | ||
647 | |||
648 | nvgpu_rbtree_enum_next(&node, node); | ||
649 | } | ||
650 | |||
651 | return 0; | ||
652 | } | ||
653 | |||
654 | static int __kmem_traces_show(struct seq_file *s, void *unused) | ||
655 | { | ||
656 | struct gk20a *g = s->private; | ||
657 | |||
658 | lock_tracker(g->vmallocs); | ||
659 | seq_puts(s, "Oustanding vmallocs:\n"); | ||
660 | __kmem_traces_dump_tracker(g, g->vmallocs, s); | ||
661 | seq_puts(s, "\n"); | ||
662 | unlock_tracker(g->vmallocs); | ||
663 | |||
664 | lock_tracker(g->kmallocs); | ||
665 | seq_puts(s, "Oustanding kmallocs:\n"); | ||
666 | __kmem_traces_dump_tracker(g, g->kmallocs, s); | ||
667 | unlock_tracker(g->kmallocs); | ||
668 | |||
669 | return 0; | ||
670 | } | ||
671 | |||
672 | static int __kmem_traces_open(struct inode *inode, struct file *file) | ||
673 | { | ||
674 | return single_open(file, __kmem_traces_show, inode->i_private); | ||
675 | } | ||
676 | |||
677 | static const struct file_operations __kmem_traces_fops = { | ||
678 | .open = __kmem_traces_open, | ||
679 | .read = seq_read, | ||
680 | .llseek = seq_lseek, | ||
681 | .release = single_release, | ||
682 | }; | ||
683 | |||
684 | void nvgpu_kmem_debugfs_init(struct device *dev) | ||
685 | { | ||
686 | struct gk20a_platform *plat = dev_get_drvdata(dev); | ||
687 | struct gk20a *g = get_gk20a(dev); | ||
688 | struct dentry *gpu_root = plat->debugfs; | ||
689 | struct dentry *node; | ||
690 | |||
691 | g->debugfs_kmem = debugfs_create_dir("kmem_tracking", gpu_root); | ||
692 | if (IS_ERR_OR_NULL(g->debugfs_kmem)) | ||
693 | return; | ||
694 | |||
695 | node = debugfs_create_file(g->vmallocs->name, S_IRUGO, | ||
696 | g->debugfs_kmem, | ||
697 | g->vmallocs, &__kmem_tracking_fops); | ||
698 | node = debugfs_create_file(g->kmallocs->name, S_IRUGO, | ||
699 | g->debugfs_kmem, | ||
700 | g->kmallocs, &__kmem_tracking_fops); | ||
701 | node = debugfs_create_file("traces", S_IRUGO, | ||
702 | g->debugfs_kmem, | ||
703 | g, &__kmem_traces_fops); | ||
704 | } | ||
705 | #else | ||
706 | void nvgpu_kmem_debugfs_init(struct device *dev) | ||
707 | { | ||
708 | } | ||
709 | #endif | ||
710 | |||
711 | static int __do_check_for_outstanding_allocs( | 410 | static int __do_check_for_outstanding_allocs( |
712 | struct gk20a *g, | 411 | struct gk20a *g, |
713 | struct nvgpu_mem_alloc_tracker *tracker, | 412 | struct nvgpu_mem_alloc_tracker *tracker, |