aboutsummaryrefslogtreecommitdiffstats
path: root/security
diff options
context:
space:
mode:
authorEric Paris <eparis@redhat.com>2010-10-25 14:41:18 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-26 14:37:17 -0400
commit8549164143a5431f9d9ea846acaa35a862410d9c (patch)
tree79b0d2aeb2674f221854866cb067947dc47f2203 /security
parentf6f94e2ab1b33f0082ac22d71f66385a60d8157f (diff)
IMA: use rbtree instead of radix tree for inode information cache
The IMA code needs to store the number of tasks which have an open fd granting permission to write a file even when IMA is not in use. It needs this information in order to be enabled at a later point in time without losing it's integrity garantees. At the moment that means we store a little bit of data about every inode in a cache. We use a radix tree key'd on the inode's memory address. Dave Chinner pointed out that a radix tree is a terrible data structure for such a sparse key space. This patch switches to using an rbtree which should be more efficient. Bug report from Dave: "I just noticed that slabtop was reporting an awfully high usage of radix tree nodes: OBJS ACTIVE USE OBJ SIZE SLABS OBJ/SLAB CACHE SIZE NAME 4200331 2778082 66% 0.55K 144839 29 2317424K radix_tree_node 2321500 2060290 88% 1.00K 72581 32 2322592K xfs_inode 2235648 2069791 92% 0.12K 69864 32 279456K iint_cache That is, 2.7M radix tree nodes are allocated, and the cache itself is consuming 2.3GB of RAM. I know that the XFS inodei caches are indexed by radix tree node, but for 2 million cached inodes that would mean a density of 1 inode per radix tree node, which for a system with 16M inodes in the filsystems is an impossibly low density. The worst I've seen in a production system like kernel.org is about 20-25% density, which would mean about 150-200k radix tree nodes for that many inodes. So it's not the inode cache. So I looked up what the iint_cache was. It appears to used for storing per-inode IMA information, and uses a radix tree for indexing. It uses the *address* of the struct inode as the indexing key. That means the key space is extremely sparse - for XFS the struct inode addresses are approximately 1000 bytes apart, which means the closest the radix tree index keys get is ~1000. Which means that there is a single entry per radix tree leaf node, so the radix tree is using roughly 550 bytes for every 120byte structure being cached. For the above example, it's probably wasting close to 1GB of RAM...." Reported-by: Dave Chinner <david@fromorbit.com> Signed-off-by: Eric Paris <eparis@redhat.com> Acked-by: Mimi Zohar <zohar@linux.vnet.ibm.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'security')
-rw-r--r--security/integrity/ima/ima.h6
-rw-r--r--security/integrity/ima/ima_iint.c105
2 files changed, 75 insertions, 36 deletions
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 3fbcd1dda0ef..7557791e954d 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -100,6 +100,8 @@ static inline unsigned long ima_hash_key(u8 *digest)
100 100
101/* integrity data associated with an inode */ 101/* integrity data associated with an inode */
102struct ima_iint_cache { 102struct ima_iint_cache {
103 struct rb_node rb_node; /* rooted in ima_iint_tree */
104 struct inode *inode; /* back pointer to inode in question */
103 u64 version; /* track inode changes */ 105 u64 version; /* track inode changes */
104 unsigned long flags; 106 unsigned long flags;
105 u8 digest[IMA_DIGEST_SIZE]; 107 u8 digest[IMA_DIGEST_SIZE];
@@ -108,7 +110,6 @@ struct ima_iint_cache {
108 long writecount; /* measured files writecount */ 110 long writecount; /* measured files writecount */
109 long opencount; /* opens reference count */ 111 long opencount; /* opens reference count */
110 struct kref refcount; /* ima_iint_cache reference count */ 112 struct kref refcount; /* ima_iint_cache reference count */
111 struct rcu_head rcu;
112}; 113};
113 114
114/* LIM API function definitions */ 115/* LIM API function definitions */
@@ -122,13 +123,12 @@ int ima_store_template(struct ima_template_entry *entry, int violation,
122void ima_template_show(struct seq_file *m, void *e, 123void ima_template_show(struct seq_file *m, void *e,
123 enum ima_show_type show); 124 enum ima_show_type show);
124 125
125/* radix tree calls to lookup, insert, delete 126/* rbtree tree calls to lookup, insert, delete
126 * integrity data associated with an inode. 127 * integrity data associated with an inode.
127 */ 128 */
128struct ima_iint_cache *ima_iint_insert(struct inode *inode); 129struct ima_iint_cache *ima_iint_insert(struct inode *inode);
129struct ima_iint_cache *ima_iint_find_get(struct inode *inode); 130struct ima_iint_cache *ima_iint_find_get(struct inode *inode);
130void iint_free(struct kref *kref); 131void iint_free(struct kref *kref);
131void iint_rcu_free(struct rcu_head *rcu);
132 132
133/* IMA policy related functions */ 133/* IMA policy related functions */
134enum ima_hooks { FILE_CHECK = 1, FILE_MMAP, BPRM_CHECK }; 134enum ima_hooks { FILE_CHECK = 1, FILE_MMAP, BPRM_CHECK };
diff --git a/security/integrity/ima/ima_iint.c b/security/integrity/ima/ima_iint.c
index afba4aef812f..8395f0f5e9b9 100644
--- a/security/integrity/ima/ima_iint.c
+++ b/security/integrity/ima/ima_iint.c
@@ -12,21 +12,48 @@
12 * File: ima_iint.c 12 * File: ima_iint.c
13 * - implements the IMA hooks: ima_inode_alloc, ima_inode_free 13 * - implements the IMA hooks: ima_inode_alloc, ima_inode_free
14 * - cache integrity information associated with an inode 14 * - cache integrity information associated with an inode
15 * using a radix tree. 15 * using a rbtree tree.
16 */ 16 */
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/spinlock.h> 19#include <linux/spinlock.h>
20#include <linux/radix-tree.h> 20#include <linux/rbtree.h>
21#include "ima.h" 21#include "ima.h"
22 22
23RADIX_TREE(ima_iint_store, GFP_ATOMIC); 23static struct rb_root ima_iint_tree = RB_ROOT;
24DEFINE_SPINLOCK(ima_iint_lock); 24static DEFINE_SPINLOCK(ima_iint_lock);
25static struct kmem_cache *iint_cache __read_mostly; 25static struct kmem_cache *iint_cache __read_mostly;
26 26
27int iint_initialized = 0; 27int iint_initialized = 0;
28 28
29/* ima_iint_find_get - return the iint associated with an inode 29/*
30 * __ima_iint_find - return the iint associated with an inode
31 */
32static struct ima_iint_cache *__ima_iint_find(struct inode *inode)
33{
34 struct ima_iint_cache *iint;
35 struct rb_node *n = ima_iint_tree.rb_node;
36
37 assert_spin_locked(&ima_iint_lock);
38
39 while (n) {
40 iint = rb_entry(n, struct ima_iint_cache, rb_node);
41
42 if (inode < iint->inode)
43 n = n->rb_left;
44 else if (inode > iint->inode)
45 n = n->rb_right;
46 else
47 break;
48 }
49 if (!n)
50 return NULL;
51
52 return iint;
53}
54
55/*
56 * ima_iint_find_get - return the iint associated with an inode
30 * 57 *
31 * ima_iint_find_get gets a reference to the iint. Caller must 58 * ima_iint_find_get gets a reference to the iint. Caller must
32 * remember to put the iint reference. 59 * remember to put the iint reference.
@@ -35,13 +62,12 @@ struct ima_iint_cache *ima_iint_find_get(struct inode *inode)
35{ 62{
36 struct ima_iint_cache *iint; 63 struct ima_iint_cache *iint;
37 64
38 rcu_read_lock(); 65 spin_lock(&ima_iint_lock);
39 iint = radix_tree_lookup(&ima_iint_store, (unsigned long)inode); 66 iint = __ima_iint_find(inode);
40 if (!iint) 67 if (iint)
41 goto out; 68 kref_get(&iint->refcount);
42 kref_get(&iint->refcount); 69 spin_unlock(&ima_iint_lock);
43out: 70
44 rcu_read_unlock();
45 return iint; 71 return iint;
46} 72}
47 73
@@ -51,25 +77,43 @@ out:
51 */ 77 */
52int ima_inode_alloc(struct inode *inode) 78int ima_inode_alloc(struct inode *inode)
53{ 79{
54 struct ima_iint_cache *iint = NULL; 80 struct rb_node **p;
55 int rc = 0; 81 struct rb_node *new_node, *parent = NULL;
82 struct ima_iint_cache *new_iint, *test_iint;
83 int rc;
56 84
57 iint = kmem_cache_alloc(iint_cache, GFP_NOFS); 85 new_iint = kmem_cache_alloc(iint_cache, GFP_NOFS);
58 if (!iint) 86 if (!new_iint)
59 return -ENOMEM; 87 return -ENOMEM;
60 88
61 rc = radix_tree_preload(GFP_NOFS); 89 new_iint->inode = inode;
62 if (rc < 0) 90 new_node = &new_iint->rb_node;
63 goto out;
64 91
65 spin_lock(&ima_iint_lock); 92 spin_lock(&ima_iint_lock);
66 rc = radix_tree_insert(&ima_iint_store, (unsigned long)inode, iint); 93
94 p = &ima_iint_tree.rb_node;
95 while (*p) {
96 parent = *p;
97 test_iint = rb_entry(parent, struct ima_iint_cache, rb_node);
98
99 rc = -EEXIST;
100 if (inode < test_iint->inode)
101 p = &(*p)->rb_left;
102 else if (inode > test_iint->inode)
103 p = &(*p)->rb_right;
104 else
105 goto out_err;
106 }
107
108 rb_link_node(new_node, parent, p);
109 rb_insert_color(new_node, &ima_iint_tree);
110
67 spin_unlock(&ima_iint_lock); 111 spin_unlock(&ima_iint_lock);
68 radix_tree_preload_end();
69out:
70 if (rc < 0)
71 kmem_cache_free(iint_cache, iint);
72 112
113 return 0;
114out_err:
115 spin_unlock(&ima_iint_lock);
116 kref_put(&new_iint->refcount, iint_free);
73 return rc; 117 return rc;
74} 118}
75 119
@@ -99,13 +143,6 @@ void iint_free(struct kref *kref)
99 kmem_cache_free(iint_cache, iint); 143 kmem_cache_free(iint_cache, iint);
100} 144}
101 145
102void iint_rcu_free(struct rcu_head *rcu_head)
103{
104 struct ima_iint_cache *iint = container_of(rcu_head,
105 struct ima_iint_cache, rcu);
106 kref_put(&iint->refcount, iint_free);
107}
108
109/** 146/**
110 * ima_inode_free - called on security_inode_free 147 * ima_inode_free - called on security_inode_free
111 * @inode: pointer to the inode 148 * @inode: pointer to the inode
@@ -117,10 +154,12 @@ void ima_inode_free(struct inode *inode)
117 struct ima_iint_cache *iint; 154 struct ima_iint_cache *iint;
118 155
119 spin_lock(&ima_iint_lock); 156 spin_lock(&ima_iint_lock);
120 iint = radix_tree_delete(&ima_iint_store, (unsigned long)inode); 157 iint = __ima_iint_find(inode);
158 if (iint)
159 rb_erase(&iint->rb_node, &ima_iint_tree);
121 spin_unlock(&ima_iint_lock); 160 spin_unlock(&ima_iint_lock);
122 if (iint) 161 if (iint)
123 call_rcu(&iint->rcu, iint_rcu_free); 162 kref_put(&iint->refcount, iint_free);
124} 163}
125 164
126static void init_once(void *foo) 165static void init_once(void *foo)