aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichel Lespinasse <walken@google.com>2012-10-08 19:31:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-09 03:22:39 -0400
commit85d3a316c714197f94e75c1e5b2d37607d66e5de (patch)
tree3a53a0ed058c1bb9647ea0f4da2d5e2fd97f68cc
parent6b2dbba8b6ac4df26f72eda1e5ea7bab9f950e08 (diff)
kmemleak: use rbtree instead of prio tree
kmemleak uses a tree where each node represents an allocated memory object in order to quickly find out what object a given address is part of. However, the objects don't overlap, so rbtrees are a better choice than prio tree for this use. They are both faster and have lower memory overhead. Tested by booting a kernel with kmemleak enabled, loading the kmemleak_test module, and looking for the expected messages. Signed-off-by: Michel Lespinasse <walken@google.com> Cc: Rik van Riel <riel@redhat.com> Cc: Hillf Danton <dhillf@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: David Woodhouse <dwmw2@infradead.org> Acked-by: Catalin Marinas <catalin.marinas@arm.com> Tested-by: Catalin Marinas <catalin.marinas@arm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/kmemleak.c100
1 files changed, 51 insertions, 49 deletions
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 0de83b4541e9..a217cc544060 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -29,7 +29,7 @@
29 * - kmemleak_lock (rwlock): protects the object_list modifications and 29 * - kmemleak_lock (rwlock): protects the object_list modifications and
30 * accesses to the object_tree_root. The object_list is the main list 30 * accesses to the object_tree_root. The object_list is the main list
31 * holding the metadata (struct kmemleak_object) for the allocated memory 31 * holding the metadata (struct kmemleak_object) for the allocated memory
32 * blocks. The object_tree_root is a priority search tree used to look-up 32 * blocks. The object_tree_root is a red black tree used to look-up
33 * metadata based on a pointer to the corresponding memory block. The 33 * metadata based on a pointer to the corresponding memory block. The
34 * kmemleak_object structures are added to the object_list and 34 * kmemleak_object structures are added to the object_list and
35 * object_tree_root in the create_object() function called from the 35 * object_tree_root in the create_object() function called from the
@@ -71,7 +71,7 @@
71#include <linux/delay.h> 71#include <linux/delay.h>
72#include <linux/export.h> 72#include <linux/export.h>
73#include <linux/kthread.h> 73#include <linux/kthread.h>
74#include <linux/prio_tree.h> 74#include <linux/rbtree.h>
75#include <linux/fs.h> 75#include <linux/fs.h>
76#include <linux/debugfs.h> 76#include <linux/debugfs.h>
77#include <linux/seq_file.h> 77#include <linux/seq_file.h>
@@ -132,7 +132,7 @@ struct kmemleak_scan_area {
132 * Structure holding the metadata for each allocated memory block. 132 * Structure holding the metadata for each allocated memory block.
133 * Modifications to such objects should be made while holding the 133 * Modifications to such objects should be made while holding the
134 * object->lock. Insertions or deletions from object_list, gray_list or 134 * object->lock. Insertions or deletions from object_list, gray_list or
135 * tree_node are already protected by the corresponding locks or mutex (see 135 * rb_node are already protected by the corresponding locks or mutex (see
136 * the notes on locking above). These objects are reference-counted 136 * the notes on locking above). These objects are reference-counted
137 * (use_count) and freed using the RCU mechanism. 137 * (use_count) and freed using the RCU mechanism.
138 */ 138 */
@@ -141,7 +141,7 @@ struct kmemleak_object {
141 unsigned long flags; /* object status flags */ 141 unsigned long flags; /* object status flags */
142 struct list_head object_list; 142 struct list_head object_list;
143 struct list_head gray_list; 143 struct list_head gray_list;
144 struct prio_tree_node tree_node; 144 struct rb_node rb_node;
145 struct rcu_head rcu; /* object_list lockless traversal */ 145 struct rcu_head rcu; /* object_list lockless traversal */
146 /* object usage count; object freed when use_count == 0 */ 146 /* object usage count; object freed when use_count == 0 */
147 atomic_t use_count; 147 atomic_t use_count;
@@ -182,9 +182,9 @@ struct kmemleak_object {
182static LIST_HEAD(object_list); 182static LIST_HEAD(object_list);
183/* the list of gray-colored objects (see color_gray comment below) */ 183/* the list of gray-colored objects (see color_gray comment below) */
184static LIST_HEAD(gray_list); 184static LIST_HEAD(gray_list);
185/* prio search tree for object boundaries */ 185/* search tree for object boundaries */
186static struct prio_tree_root object_tree_root; 186static struct rb_root object_tree_root = RB_ROOT;
187/* rw_lock protecting the access to object_list and prio_tree_root */ 187/* rw_lock protecting the access to object_list and object_tree_root */
188static DEFINE_RWLOCK(kmemleak_lock); 188static DEFINE_RWLOCK(kmemleak_lock);
189 189
190/* allocation caches for kmemleak internal data */ 190/* allocation caches for kmemleak internal data */
@@ -380,7 +380,7 @@ static void dump_object_info(struct kmemleak_object *object)
380 trace.entries = object->trace; 380 trace.entries = object->trace;
381 381
382 pr_notice("Object 0x%08lx (size %zu):\n", 382 pr_notice("Object 0x%08lx (size %zu):\n",
383 object->tree_node.start, object->size); 383 object->pointer, object->size);
384 pr_notice(" comm \"%s\", pid %d, jiffies %lu\n", 384 pr_notice(" comm \"%s\", pid %d, jiffies %lu\n",
385 object->comm, object->pid, object->jiffies); 385 object->comm, object->pid, object->jiffies);
386 pr_notice(" min_count = %d\n", object->min_count); 386 pr_notice(" min_count = %d\n", object->min_count);
@@ -392,32 +392,32 @@ static void dump_object_info(struct kmemleak_object *object)
392} 392}
393 393
394/* 394/*
395 * Look-up a memory block metadata (kmemleak_object) in the priority search 395 * Look-up a memory block metadata (kmemleak_object) in the object search
396 * tree based on a pointer value. If alias is 0, only values pointing to the 396 * tree based on a pointer value. If alias is 0, only values pointing to the
397 * beginning of the memory block are allowed. The kmemleak_lock must be held 397 * beginning of the memory block are allowed. The kmemleak_lock must be held
398 * when calling this function. 398 * when calling this function.
399 */ 399 */
400static struct kmemleak_object *lookup_object(unsigned long ptr, int alias) 400static struct kmemleak_object *lookup_object(unsigned long ptr, int alias)
401{ 401{
402 struct prio_tree_node *node; 402 struct rb_node *rb = object_tree_root.rb_node;
403 struct prio_tree_iter iter; 403
404 struct kmemleak_object *object; 404 while (rb) {
405 405 struct kmemleak_object *object =
406 prio_tree_iter_init(&iter, &object_tree_root, ptr, ptr); 406 rb_entry(rb, struct kmemleak_object, rb_node);
407 node = prio_tree_next(&iter); 407 if (ptr < object->pointer)
408 if (node) { 408 rb = object->rb_node.rb_left;
409 object = prio_tree_entry(node, struct kmemleak_object, 409 else if (object->pointer + object->size <= ptr)
410 tree_node); 410 rb = object->rb_node.rb_right;
411 if (!alias && object->pointer != ptr) { 411 else if (object->pointer == ptr || alias)
412 return object;
413 else {
412 kmemleak_warn("Found object by alias at 0x%08lx\n", 414 kmemleak_warn("Found object by alias at 0x%08lx\n",
413 ptr); 415 ptr);
414 dump_object_info(object); 416 dump_object_info(object);
415 object = NULL; 417 break;
416 } 418 }
417 } else 419 }
418 object = NULL; 420 return NULL;
419
420 return object;
421} 421}
422 422
423/* 423/*
@@ -471,7 +471,7 @@ static void put_object(struct kmemleak_object *object)
471} 471}
472 472
473/* 473/*
474 * Look up an object in the prio search tree and increase its use_count. 474 * Look up an object in the object search tree and increase its use_count.
475 */ 475 */
476static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias) 476static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias)
477{ 477{
@@ -516,8 +516,8 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
516 int min_count, gfp_t gfp) 516 int min_count, gfp_t gfp)
517{ 517{
518 unsigned long flags; 518 unsigned long flags;
519 struct kmemleak_object *object; 519 struct kmemleak_object *object, *parent;
520 struct prio_tree_node *node; 520 struct rb_node **link, *rb_parent;
521 521
522 object = kmem_cache_alloc(object_cache, gfp_kmemleak_mask(gfp)); 522 object = kmem_cache_alloc(object_cache, gfp_kmemleak_mask(gfp));
523 if (!object) { 523 if (!object) {
@@ -560,31 +560,34 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
560 /* kernel backtrace */ 560 /* kernel backtrace */
561 object->trace_len = __save_stack_trace(object->trace); 561 object->trace_len = __save_stack_trace(object->trace);
562 562
563 INIT_PRIO_TREE_NODE(&object->tree_node);
564 object->tree_node.start = ptr;
565 object->tree_node.last = ptr + size - 1;
566
567 write_lock_irqsave(&kmemleak_lock, flags); 563 write_lock_irqsave(&kmemleak_lock, flags);
568 564
569 min_addr = min(min_addr, ptr); 565 min_addr = min(min_addr, ptr);
570 max_addr = max(max_addr, ptr + size); 566 max_addr = max(max_addr, ptr + size);
571 node = prio_tree_insert(&object_tree_root, &object->tree_node); 567 link = &object_tree_root.rb_node;
572 /* 568 rb_parent = NULL;
573 * The code calling the kernel does not yet have the pointer to the 569 while (*link) {
574 * memory block to be able to free it. However, we still hold the 570 rb_parent = *link;
575 * kmemleak_lock here in case parts of the kernel started freeing 571 parent = rb_entry(rb_parent, struct kmemleak_object, rb_node);
576 * random memory blocks. 572 if (ptr + size <= parent->pointer)
577 */ 573 link = &parent->rb_node.rb_left;
578 if (node != &object->tree_node) { 574 else if (parent->pointer + parent->size <= ptr)
579 kmemleak_stop("Cannot insert 0x%lx into the object search tree " 575 link = &parent->rb_node.rb_right;
580 "(already existing)\n", ptr); 576 else {
581 object = lookup_object(ptr, 1); 577 kmemleak_stop("Cannot insert 0x%lx into the object "
582 spin_lock(&object->lock); 578 "search tree (overlaps existing)\n",
583 dump_object_info(object); 579 ptr);
584 spin_unlock(&object->lock); 580 kmem_cache_free(object_cache, object);
585 581 object = parent;
586 goto out; 582 spin_lock(&object->lock);
583 dump_object_info(object);
584 spin_unlock(&object->lock);
585 goto out;
586 }
587 } 587 }
588 rb_link_node(&object->rb_node, rb_parent, link);
589 rb_insert_color(&object->rb_node, &object_tree_root);
590
588 list_add_tail_rcu(&object->object_list, &object_list); 591 list_add_tail_rcu(&object->object_list, &object_list);
589out: 592out:
590 write_unlock_irqrestore(&kmemleak_lock, flags); 593 write_unlock_irqrestore(&kmemleak_lock, flags);
@@ -600,7 +603,7 @@ static void __delete_object(struct kmemleak_object *object)
600 unsigned long flags; 603 unsigned long flags;
601 604
602 write_lock_irqsave(&kmemleak_lock, flags); 605 write_lock_irqsave(&kmemleak_lock, flags);
603 prio_tree_remove(&object_tree_root, &object->tree_node); 606 rb_erase(&object->rb_node, &object_tree_root);
604 list_del_rcu(&object->object_list); 607 list_del_rcu(&object->object_list);
605 write_unlock_irqrestore(&kmemleak_lock, flags); 608 write_unlock_irqrestore(&kmemleak_lock, flags);
606 609
@@ -1766,7 +1769,6 @@ void __init kmemleak_init(void)
1766 1769
1767 object_cache = KMEM_CACHE(kmemleak_object, SLAB_NOLEAKTRACE); 1770 object_cache = KMEM_CACHE(kmemleak_object, SLAB_NOLEAKTRACE);
1768 scan_area_cache = KMEM_CACHE(kmemleak_scan_area, SLAB_NOLEAKTRACE); 1771 scan_area_cache = KMEM_CACHE(kmemleak_scan_area, SLAB_NOLEAKTRACE);
1769 INIT_PRIO_TREE_ROOT(&object_tree_root);
1770 1772
1771 if (crt_early_log >= ARRAY_SIZE(early_log)) 1773 if (crt_early_log >= ARRAY_SIZE(early_log))
1772 pr_warning("Early log buffer exceeded (%d), please increase " 1774 pr_warning("Early log buffer exceeded (%d), please increase "