diff options
author | Eric Paris <eparis@redhat.com> | 2010-10-25 14:41:18 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-26 14:37:17 -0400 |
commit | 8549164143a5431f9d9ea846acaa35a862410d9c (patch) | |
tree | 79b0d2aeb2674f221854866cb067947dc47f2203 | |
parent | f6f94e2ab1b33f0082ac22d71f66385a60d8157f (diff) |
IMA: use rbtree instead of radix tree for inode information cache
The IMA code needs to store the number of tasks which have an open fd
granting permission to write a file even when IMA is not in use. It
needs this information in order to be enabled at a later point in time
without losing it's integrity garantees.
At the moment that means we store a little bit of data about every inode
in a cache. We use a radix tree key'd on the inode's memory address.
Dave Chinner pointed out that a radix tree is a terrible data structure
for such a sparse key space. This patch switches to using an rbtree
which should be more efficient.
Bug report from Dave:
"I just noticed that slabtop was reporting an awfully high usage of
radix tree nodes:
OBJS ACTIVE USE OBJ SIZE SLABS OBJ/SLAB CACHE SIZE NAME
4200331 2778082 66% 0.55K 144839 29 2317424K radix_tree_node
2321500 2060290 88% 1.00K 72581 32 2322592K xfs_inode
2235648 2069791 92% 0.12K 69864 32 279456K iint_cache
That is, 2.7M radix tree nodes are allocated, and the cache itself is
consuming 2.3GB of RAM. I know that the XFS inodei caches are indexed
by radix tree node, but for 2 million cached inodes that would mean a
density of 1 inode per radix tree node, which for a system with 16M
inodes in the filsystems is an impossibly low density. The worst I've
seen in a production system like kernel.org is about 20-25% density,
which would mean about 150-200k radix tree nodes for that many inodes.
So it's not the inode cache.
So I looked up what the iint_cache was. It appears to used for
storing per-inode IMA information, and uses a radix tree for indexing.
It uses the *address* of the struct inode as the indexing key. That
means the key space is extremely sparse - for XFS the struct inode
addresses are approximately 1000 bytes apart, which means the closest
the radix tree index keys get is ~1000. Which means that there is a
single entry per radix tree leaf node, so the radix tree is using
roughly 550 bytes for every 120byte structure being cached. For the
above example, it's probably wasting close to 1GB of RAM...."
Reported-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | security/integrity/ima/ima.h | 6 | ||||
-rw-r--r-- | security/integrity/ima/ima_iint.c | 105 |
2 files changed, 75 insertions, 36 deletions
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index 3fbcd1dda0ef..7557791e954d 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h | |||
@@ -100,6 +100,8 @@ static inline unsigned long ima_hash_key(u8 *digest) | |||
100 | 100 | ||
101 | /* integrity data associated with an inode */ | 101 | /* integrity data associated with an inode */ |
102 | struct ima_iint_cache { | 102 | struct ima_iint_cache { |
103 | struct rb_node rb_node; /* rooted in ima_iint_tree */ | ||
104 | struct inode *inode; /* back pointer to inode in question */ | ||
103 | u64 version; /* track inode changes */ | 105 | u64 version; /* track inode changes */ |
104 | unsigned long flags; | 106 | unsigned long flags; |
105 | u8 digest[IMA_DIGEST_SIZE]; | 107 | u8 digest[IMA_DIGEST_SIZE]; |
@@ -108,7 +110,6 @@ struct ima_iint_cache { | |||
108 | long writecount; /* measured files writecount */ | 110 | long writecount; /* measured files writecount */ |
109 | long opencount; /* opens reference count */ | 111 | long opencount; /* opens reference count */ |
110 | struct kref refcount; /* ima_iint_cache reference count */ | 112 | struct kref refcount; /* ima_iint_cache reference count */ |
111 | struct rcu_head rcu; | ||
112 | }; | 113 | }; |
113 | 114 | ||
114 | /* LIM API function definitions */ | 115 | /* LIM API function definitions */ |
@@ -122,13 +123,12 @@ int ima_store_template(struct ima_template_entry *entry, int violation, | |||
122 | void ima_template_show(struct seq_file *m, void *e, | 123 | void ima_template_show(struct seq_file *m, void *e, |
123 | enum ima_show_type show); | 124 | enum ima_show_type show); |
124 | 125 | ||
125 | /* radix tree calls to lookup, insert, delete | 126 | /* rbtree tree calls to lookup, insert, delete |
126 | * integrity data associated with an inode. | 127 | * integrity data associated with an inode. |
127 | */ | 128 | */ |
128 | struct ima_iint_cache *ima_iint_insert(struct inode *inode); | 129 | struct ima_iint_cache *ima_iint_insert(struct inode *inode); |
129 | struct ima_iint_cache *ima_iint_find_get(struct inode *inode); | 130 | struct ima_iint_cache *ima_iint_find_get(struct inode *inode); |
130 | void iint_free(struct kref *kref); | 131 | void iint_free(struct kref *kref); |
131 | void iint_rcu_free(struct rcu_head *rcu); | ||
132 | 132 | ||
133 | /* IMA policy related functions */ | 133 | /* IMA policy related functions */ |
134 | enum ima_hooks { FILE_CHECK = 1, FILE_MMAP, BPRM_CHECK }; | 134 | enum ima_hooks { FILE_CHECK = 1, FILE_MMAP, BPRM_CHECK }; |
diff --git a/security/integrity/ima/ima_iint.c b/security/integrity/ima/ima_iint.c index afba4aef812f..8395f0f5e9b9 100644 --- a/security/integrity/ima/ima_iint.c +++ b/security/integrity/ima/ima_iint.c | |||
@@ -12,21 +12,48 @@ | |||
12 | * File: ima_iint.c | 12 | * File: ima_iint.c |
13 | * - implements the IMA hooks: ima_inode_alloc, ima_inode_free | 13 | * - implements the IMA hooks: ima_inode_alloc, ima_inode_free |
14 | * - cache integrity information associated with an inode | 14 | * - cache integrity information associated with an inode |
15 | * using a radix tree. | 15 | * using a rbtree tree. |
16 | */ | 16 | */ |
17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
19 | #include <linux/spinlock.h> | 19 | #include <linux/spinlock.h> |
20 | #include <linux/radix-tree.h> | 20 | #include <linux/rbtree.h> |
21 | #include "ima.h" | 21 | #include "ima.h" |
22 | 22 | ||
23 | RADIX_TREE(ima_iint_store, GFP_ATOMIC); | 23 | static struct rb_root ima_iint_tree = RB_ROOT; |
24 | DEFINE_SPINLOCK(ima_iint_lock); | 24 | static DEFINE_SPINLOCK(ima_iint_lock); |
25 | static struct kmem_cache *iint_cache __read_mostly; | 25 | static struct kmem_cache *iint_cache __read_mostly; |
26 | 26 | ||
27 | int iint_initialized = 0; | 27 | int iint_initialized = 0; |
28 | 28 | ||
29 | /* ima_iint_find_get - return the iint associated with an inode | 29 | /* |
30 | * __ima_iint_find - return the iint associated with an inode | ||
31 | */ | ||
32 | static struct ima_iint_cache *__ima_iint_find(struct inode *inode) | ||
33 | { | ||
34 | struct ima_iint_cache *iint; | ||
35 | struct rb_node *n = ima_iint_tree.rb_node; | ||
36 | |||
37 | assert_spin_locked(&ima_iint_lock); | ||
38 | |||
39 | while (n) { | ||
40 | iint = rb_entry(n, struct ima_iint_cache, rb_node); | ||
41 | |||
42 | if (inode < iint->inode) | ||
43 | n = n->rb_left; | ||
44 | else if (inode > iint->inode) | ||
45 | n = n->rb_right; | ||
46 | else | ||
47 | break; | ||
48 | } | ||
49 | if (!n) | ||
50 | return NULL; | ||
51 | |||
52 | return iint; | ||
53 | } | ||
54 | |||
55 | /* | ||
56 | * ima_iint_find_get - return the iint associated with an inode | ||
30 | * | 57 | * |
31 | * ima_iint_find_get gets a reference to the iint. Caller must | 58 | * ima_iint_find_get gets a reference to the iint. Caller must |
32 | * remember to put the iint reference. | 59 | * remember to put the iint reference. |
@@ -35,13 +62,12 @@ struct ima_iint_cache *ima_iint_find_get(struct inode *inode) | |||
35 | { | 62 | { |
36 | struct ima_iint_cache *iint; | 63 | struct ima_iint_cache *iint; |
37 | 64 | ||
38 | rcu_read_lock(); | 65 | spin_lock(&ima_iint_lock); |
39 | iint = radix_tree_lookup(&ima_iint_store, (unsigned long)inode); | 66 | iint = __ima_iint_find(inode); |
40 | if (!iint) | 67 | if (iint) |
41 | goto out; | 68 | kref_get(&iint->refcount); |
42 | kref_get(&iint->refcount); | 69 | spin_unlock(&ima_iint_lock); |
43 | out: | 70 | |
44 | rcu_read_unlock(); | ||
45 | return iint; | 71 | return iint; |
46 | } | 72 | } |
47 | 73 | ||
@@ -51,25 +77,43 @@ out: | |||
51 | */ | 77 | */ |
52 | int ima_inode_alloc(struct inode *inode) | 78 | int ima_inode_alloc(struct inode *inode) |
53 | { | 79 | { |
54 | struct ima_iint_cache *iint = NULL; | 80 | struct rb_node **p; |
55 | int rc = 0; | 81 | struct rb_node *new_node, *parent = NULL; |
82 | struct ima_iint_cache *new_iint, *test_iint; | ||
83 | int rc; | ||
56 | 84 | ||
57 | iint = kmem_cache_alloc(iint_cache, GFP_NOFS); | 85 | new_iint = kmem_cache_alloc(iint_cache, GFP_NOFS); |
58 | if (!iint) | 86 | if (!new_iint) |
59 | return -ENOMEM; | 87 | return -ENOMEM; |
60 | 88 | ||
61 | rc = radix_tree_preload(GFP_NOFS); | 89 | new_iint->inode = inode; |
62 | if (rc < 0) | 90 | new_node = &new_iint->rb_node; |
63 | goto out; | ||
64 | 91 | ||
65 | spin_lock(&ima_iint_lock); | 92 | spin_lock(&ima_iint_lock); |
66 | rc = radix_tree_insert(&ima_iint_store, (unsigned long)inode, iint); | 93 | |
94 | p = &ima_iint_tree.rb_node; | ||
95 | while (*p) { | ||
96 | parent = *p; | ||
97 | test_iint = rb_entry(parent, struct ima_iint_cache, rb_node); | ||
98 | |||
99 | rc = -EEXIST; | ||
100 | if (inode < test_iint->inode) | ||
101 | p = &(*p)->rb_left; | ||
102 | else if (inode > test_iint->inode) | ||
103 | p = &(*p)->rb_right; | ||
104 | else | ||
105 | goto out_err; | ||
106 | } | ||
107 | |||
108 | rb_link_node(new_node, parent, p); | ||
109 | rb_insert_color(new_node, &ima_iint_tree); | ||
110 | |||
67 | spin_unlock(&ima_iint_lock); | 111 | spin_unlock(&ima_iint_lock); |
68 | radix_tree_preload_end(); | ||
69 | out: | ||
70 | if (rc < 0) | ||
71 | kmem_cache_free(iint_cache, iint); | ||
72 | 112 | ||
113 | return 0; | ||
114 | out_err: | ||
115 | spin_unlock(&ima_iint_lock); | ||
116 | kref_put(&new_iint->refcount, iint_free); | ||
73 | return rc; | 117 | return rc; |
74 | } | 118 | } |
75 | 119 | ||
@@ -99,13 +143,6 @@ void iint_free(struct kref *kref) | |||
99 | kmem_cache_free(iint_cache, iint); | 143 | kmem_cache_free(iint_cache, iint); |
100 | } | 144 | } |
101 | 145 | ||
102 | void iint_rcu_free(struct rcu_head *rcu_head) | ||
103 | { | ||
104 | struct ima_iint_cache *iint = container_of(rcu_head, | ||
105 | struct ima_iint_cache, rcu); | ||
106 | kref_put(&iint->refcount, iint_free); | ||
107 | } | ||
108 | |||
109 | /** | 146 | /** |
110 | * ima_inode_free - called on security_inode_free | 147 | * ima_inode_free - called on security_inode_free |
111 | * @inode: pointer to the inode | 148 | * @inode: pointer to the inode |
@@ -117,10 +154,12 @@ void ima_inode_free(struct inode *inode) | |||
117 | struct ima_iint_cache *iint; | 154 | struct ima_iint_cache *iint; |
118 | 155 | ||
119 | spin_lock(&ima_iint_lock); | 156 | spin_lock(&ima_iint_lock); |
120 | iint = radix_tree_delete(&ima_iint_store, (unsigned long)inode); | 157 | iint = __ima_iint_find(inode); |
158 | if (iint) | ||
159 | rb_erase(&iint->rb_node, &ima_iint_tree); | ||
121 | spin_unlock(&ima_iint_lock); | 160 | spin_unlock(&ima_iint_lock); |
122 | if (iint) | 161 | if (iint) |
123 | call_rcu(&iint->rcu, iint_rcu_free); | 162 | kref_put(&iint->refcount, iint_free); |
124 | } | 163 | } |
125 | 164 | ||
126 | static void init_once(void *foo) | 165 | static void init_once(void *foo) |