aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2012-01-09 20:24:30 -0500
committerEric W. Biederman <ebiederm@xmission.com>2012-01-24 19:40:30 -0500
commitac13ac6f4c6c0504d2c927862216f4e422a2c0b5 (patch)
tree87d0d698e79b911428b8f8e2f357a27f74c854f9
parent9e3d47df35abd6430fed04fb40a76c7358b1e815 (diff)
sysctl: Index sysctl directories with rbtrees.
One of the most important jobs of sysctl is to export network stack tunables. Several of those tunables are per network device. In several instances people are running with 1000+ network devices in there network stacks, which makes the simple per directory linked list in sysctl a scaling bottleneck. Replace O(N^2) sysctl insertion and lookup times with O(NlogN) by using an rbtree to index the sysctl directories. Benchmark before: make-dummies 0 999 -> 0.32s rmmod dummy -> 0.12s make-dummies 0 9999 -> 1m17s rmmod dummy -> 17s Benchmark after: make-dummies 0 999 -> 0.074s rmmod dummy -> 0.070s make-dummies 0 9999 -> 3.4s rmmod dummy -> 0.44s Benchmark after (without dev_snmp6): make-dummies 0 9999 -> 0.75s rmmod dummy -> 0.44s make-dummies 0 99999 -> 11s rmmod dummy -> 4.3s At 10,000 dummy devices the bottleneck becomes the time to add and remove the files under /proc/sys/net/dev_snmp6. I have commented out the code that adds and removes files under /proc/sys/net/dev_snmp6 and taken measurments of creating and destroying 100,000 dummies to verify the sysctl continues to scale. Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
-rw-r--r--fs/proc/proc_sysctl.c224
-rw-r--r--include/linux/sysctl.h10
2 files changed, 142 insertions, 92 deletions
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index e971ccccac4a..05c393a5c530 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -33,12 +33,10 @@ static struct ctl_table root_table[] = {
33 { } 33 { }
34}; 34};
35static struct ctl_table_root sysctl_table_root = { 35static struct ctl_table_root sysctl_table_root = {
36 .default_set.dir.list = LIST_HEAD_INIT(sysctl_table_root.default_set.dir.list),
37 .default_set.dir.header = { 36 .default_set.dir.header = {
38 {{.count = 1, 37 {{.count = 1,
39 .nreg = 1, 38 .nreg = 1,
40 .ctl_table = root_table, 39 .ctl_table = root_table }},
41 .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.dir.header.ctl_entry),}},
42 .ctl_table_arg = root_table, 40 .ctl_table_arg = root_table,
43 .root = &sysctl_table_root, 41 .root = &sysctl_table_root,
44 .set = &sysctl_table_root.default_set, 42 .set = &sysctl_table_root.default_set,
@@ -52,7 +50,6 @@ static int sysctl_follow_link(struct ctl_table_header **phead,
52 struct ctl_table **pentry, struct nsproxy *namespaces); 50 struct ctl_table **pentry, struct nsproxy *namespaces);
53static int insert_links(struct ctl_table_header *head); 51static int insert_links(struct ctl_table_header *head);
54static void put_links(struct ctl_table_header *header); 52static void put_links(struct ctl_table_header *header);
55static int sysctl_check_dups(struct ctl_dir *dir, struct ctl_table *table);
56 53
57static void sysctl_print_dir(struct ctl_dir *dir) 54static void sysctl_print_dir(struct ctl_dir *dir)
58{ 55{
@@ -81,28 +78,83 @@ static struct ctl_table *find_entry(struct ctl_table_header **phead,
81{ 78{
82 struct ctl_table_header *head; 79 struct ctl_table_header *head;
83 struct ctl_table *entry; 80 struct ctl_table *entry;
81 struct rb_node *node = dir->root.rb_node;
84 82
85 list_for_each_entry(head, &dir->list, ctl_entry) { 83 while (node)
86 if (head->unregistering) 84 {
87 continue; 85 struct ctl_node *ctl_node;
88 for (entry = head->ctl_table; entry->procname; entry++) { 86 const char *procname;
89 const char *procname = entry->procname; 87 int cmp;
90 if (namecmp(procname, strlen(procname), name, namelen) == 0) { 88
91 *phead = head; 89 ctl_node = rb_entry(node, struct ctl_node, node);
92 return entry; 90 head = ctl_node->header;
93 } 91 entry = &head->ctl_table[ctl_node - head->node];
92 procname = entry->procname;
93
94 cmp = namecmp(name, namelen, procname, strlen(procname));
95 if (cmp < 0)
96 node = node->rb_left;
97 else if (cmp > 0)
98 node = node->rb_right;
99 else {
100 *phead = head;
101 return entry;
94 } 102 }
95 } 103 }
96 return NULL; 104 return NULL;
97} 105}
98 106
107static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry)
108{
109 struct rb_node *node = &head->node[entry - head->ctl_table].node;
110 struct rb_node **p = &head->parent->root.rb_node;
111 struct rb_node *parent = NULL;
112 const char *name = entry->procname;
113 int namelen = strlen(name);
114
115 while (*p) {
116 struct ctl_table_header *parent_head;
117 struct ctl_table *parent_entry;
118 struct ctl_node *parent_node;
119 const char *parent_name;
120 int cmp;
121
122 parent = *p;
123 parent_node = rb_entry(parent, struct ctl_node, node);
124 parent_head = parent_node->header;
125 parent_entry = &parent_head->ctl_table[parent_node - parent_head->node];
126 parent_name = parent_entry->procname;
127
128 cmp = namecmp(name, namelen, parent_name, strlen(parent_name));
129 if (cmp < 0)
130 p = &(*p)->rb_left;
131 else if (cmp > 0)
132 p = &(*p)->rb_right;
133 else {
134 printk(KERN_ERR "sysctl duplicate entry: ");
135 sysctl_print_dir(head->parent);
136 printk(KERN_CONT "/%s\n", entry->procname);
137 return -EEXIST;
138 }
139 }
140
141 rb_link_node(node, parent, p);
142 return 0;
143}
144
145static void erase_entry(struct ctl_table_header *head, struct ctl_table *entry)
146{
147 struct rb_node *node = &head->node[entry - head->ctl_table].node;
148
149 rb_erase(node, &head->parent->root);
150}
151
99static void init_header(struct ctl_table_header *head, 152static void init_header(struct ctl_table_header *head,
100 struct ctl_table_root *root, struct ctl_table_set *set, 153 struct ctl_table_root *root, struct ctl_table_set *set,
101 struct ctl_table *table) 154 struct ctl_node *node, struct ctl_table *table)
102{ 155{
103 head->ctl_table = table; 156 head->ctl_table = table;
104 head->ctl_table_arg = table; 157 head->ctl_table_arg = table;
105 INIT_LIST_HEAD(&head->ctl_entry);
106 head->used = 0; 158 head->used = 0;
107 head->count = 1; 159 head->count = 1;
108 head->nreg = 1; 160 head->nreg = 1;
@@ -110,28 +162,42 @@ static void init_header(struct ctl_table_header *head,
110 head->root = root; 162 head->root = root;
111 head->set = set; 163 head->set = set;
112 head->parent = NULL; 164 head->parent = NULL;
165 head->node = node;
166 if (node) {
167 struct ctl_table *entry;
168 for (entry = table; entry->procname; entry++, node++) {
169 rb_init_node(&node->node);
170 node->header = head;
171 }
172 }
113} 173}
114 174
115static void erase_header(struct ctl_table_header *head) 175static void erase_header(struct ctl_table_header *head)
116{ 176{
117 list_del_init(&head->ctl_entry); 177 struct ctl_table *entry;
178 for (entry = head->ctl_table; entry->procname; entry++)
179 erase_entry(head, entry);
118} 180}
119 181
120static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) 182static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header)
121{ 183{
184 struct ctl_table *entry;
122 int err; 185 int err;
123 186
124 err = sysctl_check_dups(dir, header->ctl_table);
125 if (err)
126 return err;
127
128 dir->header.nreg++; 187 dir->header.nreg++;
129 header->parent = dir; 188 header->parent = dir;
130 err = insert_links(header); 189 err = insert_links(header);
131 if (err) 190 if (err)
132 goto fail_links; 191 goto fail_links;
133 list_add_tail(&header->ctl_entry, &header->parent->list); 192 for (entry = header->ctl_table; entry->procname; entry++) {
193 err = insert_entry(header, entry);
194 if (err)
195 goto fail;
196 }
134 return 0; 197 return 0;
198fail:
199 erase_header(header);
200 put_links(header);
135fail_links: 201fail_links:
136 header->parent = NULL; 202 header->parent = NULL;
137 drop_sysctl_table(&dir->header); 203 drop_sysctl_table(&dir->header);
@@ -241,19 +307,14 @@ static struct ctl_table *lookup_entry(struct ctl_table_header **phead,
241 return entry; 307 return entry;
242} 308}
243 309
244static struct ctl_table_header *next_usable_entry(struct ctl_dir *dir, 310static struct ctl_node *first_usable_entry(struct rb_node *node)
245 struct list_head *tmp)
246{ 311{
247 struct ctl_table_header *head; 312 struct ctl_node *ctl_node;
248
249 for (tmp = tmp->next; tmp != &dir->list; tmp = tmp->next) {
250 head = list_entry(tmp, struct ctl_table_header, ctl_entry);
251 313
252 if (!head->ctl_table->procname || 314 for (;node; node = rb_next(node)) {
253 !use_table(head)) 315 ctl_node = rb_entry(node, struct ctl_node, node);
254 continue; 316 if (use_table(ctl_node->header))
255 317 return ctl_node;
256 return head;
257 } 318 }
258 return NULL; 319 return NULL;
259} 320}
@@ -261,14 +322,17 @@ static struct ctl_table_header *next_usable_entry(struct ctl_dir *dir,
261static void first_entry(struct ctl_dir *dir, 322static void first_entry(struct ctl_dir *dir,
262 struct ctl_table_header **phead, struct ctl_table **pentry) 323 struct ctl_table_header **phead, struct ctl_table **pentry)
263{ 324{
264 struct ctl_table_header *head; 325 struct ctl_table_header *head = NULL;
265 struct ctl_table *entry = NULL; 326 struct ctl_table *entry = NULL;
327 struct ctl_node *ctl_node;
266 328
267 spin_lock(&sysctl_lock); 329 spin_lock(&sysctl_lock);
268 head = next_usable_entry(dir, &dir->list); 330 ctl_node = first_usable_entry(rb_first(&dir->root));
269 spin_unlock(&sysctl_lock); 331 spin_unlock(&sysctl_lock);
270 if (head) 332 if (ctl_node) {
271 entry = head->ctl_table; 333 head = ctl_node->header;
334 entry = &head->ctl_table[ctl_node - head->node];
335 }
272 *phead = head; 336 *phead = head;
273 *pentry = entry; 337 *pentry = entry;
274} 338}
@@ -277,15 +341,17 @@ static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentr
277{ 341{
278 struct ctl_table_header *head = *phead; 342 struct ctl_table_header *head = *phead;
279 struct ctl_table *entry = *pentry; 343 struct ctl_table *entry = *pentry;
344 struct ctl_node *ctl_node = &head->node[entry - head->ctl_table];
280 345
281 entry++; 346 spin_lock(&sysctl_lock);
282 if (!entry->procname) { 347 unuse_table(head);
283 spin_lock(&sysctl_lock); 348
284 unuse_table(head); 349 ctl_node = first_usable_entry(rb_next(&ctl_node->node));
285 head = next_usable_entry(head->parent, &head->ctl_entry); 350 spin_unlock(&sysctl_lock);
286 spin_unlock(&sysctl_lock); 351 head = NULL;
287 if (head) 352 if (ctl_node) {
288 entry = head->ctl_table; 353 head = ctl_node->header;
354 entry = &head->ctl_table[ctl_node - head->node];
289 } 355 }
290 *phead = head; 356 *phead = head;
291 *pentry = entry; 357 *pentry = entry;
@@ -777,21 +843,23 @@ static struct ctl_dir *new_dir(struct ctl_table_set *set,
777{ 843{
778 struct ctl_table *table; 844 struct ctl_table *table;
779 struct ctl_dir *new; 845 struct ctl_dir *new;
846 struct ctl_node *node;
780 char *new_name; 847 char *new_name;
781 848
782 new = kzalloc(sizeof(*new) + sizeof(struct ctl_table)*2 + 849 new = kzalloc(sizeof(*new) + sizeof(struct ctl_node) +
783 namelen + 1, GFP_KERNEL); 850 sizeof(struct ctl_table)*2 + namelen + 1,
851 GFP_KERNEL);
784 if (!new) 852 if (!new)
785 return NULL; 853 return NULL;
786 854
787 table = (struct ctl_table *)(new + 1); 855 node = (struct ctl_node *)(new + 1);
856 table = (struct ctl_table *)(node + 1);
788 new_name = (char *)(table + 2); 857 new_name = (char *)(table + 2);
789 memcpy(new_name, name, namelen); 858 memcpy(new_name, name, namelen);
790 new_name[namelen] = '\0'; 859 new_name[namelen] = '\0';
791 INIT_LIST_HEAD(&new->list);
792 table[0].procname = new_name; 860 table[0].procname = new_name;
793 table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO; 861 table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO;
794 init_header(&new->header, set->dir.header.root, set, table); 862 init_header(&new->header, set->dir.header.root, set, node, table);
795 863
796 return new; 864 return new;
797} 865}
@@ -892,40 +960,6 @@ static int sysctl_follow_link(struct ctl_table_header **phead,
892 return ret; 960 return ret;
893} 961}
894 962
895static int sysctl_check_table_dups(struct ctl_dir *dir, struct ctl_table *old,
896 struct ctl_table *table)
897{
898 struct ctl_table *entry, *test;
899 int error = 0;
900
901 for (entry = old; entry->procname; entry++) {
902 for (test = table; test->procname; test++) {
903 if (strcmp(entry->procname, test->procname) == 0) {
904 printk(KERN_ERR "sysctl duplicate entry: ");
905 sysctl_print_dir(dir);
906 printk(KERN_CONT "/%s\n", test->procname);
907 error = -EEXIST;
908 }
909 }
910 }
911 return error;
912}
913
914static int sysctl_check_dups(struct ctl_dir *dir, struct ctl_table *table)
915{
916 struct ctl_table_header *head;
917 int error = 0;
918
919 list_for_each_entry(head, &dir->list, ctl_entry) {
920 if (head->unregistering)
921 continue;
922 if (head->parent != dir)
923 continue;
924 error = sysctl_check_table_dups(dir, head->ctl_table, table);
925 }
926 return error;
927}
928
929static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...) 963static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...)
930{ 964{
931 struct va_format vaf; 965 struct va_format vaf;
@@ -977,6 +1011,7 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table
977{ 1011{
978 struct ctl_table *link_table, *entry, *link; 1012 struct ctl_table *link_table, *entry, *link;
979 struct ctl_table_header *links; 1013 struct ctl_table_header *links;
1014 struct ctl_node *node;
980 char *link_name; 1015 char *link_name;
981 int nr_entries, name_bytes; 1016 int nr_entries, name_bytes;
982 1017
@@ -988,6 +1023,7 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table
988 } 1023 }
989 1024
990 links = kzalloc(sizeof(struct ctl_table_header) + 1025 links = kzalloc(sizeof(struct ctl_table_header) +
1026 sizeof(struct ctl_node)*nr_entries +
991 sizeof(struct ctl_table)*(nr_entries + 1) + 1027 sizeof(struct ctl_table)*(nr_entries + 1) +
992 name_bytes, 1028 name_bytes,
993 GFP_KERNEL); 1029 GFP_KERNEL);
@@ -995,7 +1031,8 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table
995 if (!links) 1031 if (!links)
996 return NULL; 1032 return NULL;
997 1033
998 link_table = (struct ctl_table *)(links + 1); 1034 node = (struct ctl_node *)(links + 1);
1035 link_table = (struct ctl_table *)(node + nr_entries);
999 link_name = (char *)&link_table[nr_entries + 1]; 1036 link_name = (char *)&link_table[nr_entries + 1];
1000 1037
1001 for (link = link_table, entry = table; entry->procname; link++, entry++) { 1038 for (link = link_table, entry = table; entry->procname; link++, entry++) {
@@ -1006,7 +1043,7 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table
1006 link->data = link_root; 1043 link->data = link_root;
1007 link_name += len; 1044 link_name += len;
1008 } 1045 }
1009 init_header(links, dir->header.root, dir->header.set, link_table); 1046 init_header(links, dir->header.root, dir->header.set, node, link_table);
1010 links->nreg = nr_entries; 1047 links->nreg = nr_entries;
1011 1048
1012 return links; 1049 return links;
@@ -1132,12 +1169,20 @@ struct ctl_table_header *__register_sysctl_table(
1132 struct ctl_table_header *header; 1169 struct ctl_table_header *header;
1133 const char *name, *nextname; 1170 const char *name, *nextname;
1134 struct ctl_dir *dir; 1171 struct ctl_dir *dir;
1172 struct ctl_table *entry;
1173 struct ctl_node *node;
1174 int nr_entries = 0;
1175
1176 for (entry = table; entry->procname; entry++)
1177 nr_entries++;
1135 1178
1136 header = kzalloc(sizeof(struct ctl_table_header), GFP_KERNEL); 1179 header = kzalloc(sizeof(struct ctl_table_header) +
1180 sizeof(struct ctl_node)*nr_entries, GFP_KERNEL);
1137 if (!header) 1181 if (!header)
1138 return NULL; 1182 return NULL;
1139 1183
1140 init_header(header, root, set, table); 1184 node = (struct ctl_node *)(header + 1);
1185 init_header(header, root, set, node, table);
1141 if (sysctl_check_table(path, table)) 1186 if (sysctl_check_table(path, table))
1142 goto fail; 1187 goto fail;
1143 1188
@@ -1489,13 +1534,12 @@ void setup_sysctl_set(struct ctl_table_set *set,
1489{ 1534{
1490 memset(set, sizeof(*set), 0); 1535 memset(set, sizeof(*set), 0);
1491 set->is_seen = is_seen; 1536 set->is_seen = is_seen;
1492 INIT_LIST_HEAD(&set->dir.list); 1537 init_header(&set->dir.header, root, set, NULL, root_table);
1493 init_header(&set->dir.header, root, set, root_table);
1494} 1538}
1495 1539
1496void retire_sysctl_set(struct ctl_table_set *set) 1540void retire_sysctl_set(struct ctl_table_set *set)
1497{ 1541{
1498 WARN_ON(!list_empty(&set->dir.list)); 1542 WARN_ON(!RB_EMPTY_ROOT(&set->dir.root));
1499} 1543}
1500 1544
1501int __init proc_sys_init(void) 1545int __init proc_sys_init(void)
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 36dec756ef9d..35c50ed36fc9 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -932,6 +932,7 @@ enum
932#include <linux/list.h> 932#include <linux/list.h>
933#include <linux/rcupdate.h> 933#include <linux/rcupdate.h>
934#include <linux/wait.h> 934#include <linux/wait.h>
935#include <linux/rbtree.h>
935 936
936/* For the /proc/sys support */ 937/* For the /proc/sys support */
937struct ctl_table; 938struct ctl_table;
@@ -1023,6 +1024,11 @@ struct ctl_table
1023 void *extra2; 1024 void *extra2;
1024}; 1025};
1025 1026
1027struct ctl_node {
1028 struct rb_node node;
1029 struct ctl_table_header *header;
1030};
1031
1026/* struct ctl_table_header is used to maintain dynamic lists of 1032/* struct ctl_table_header is used to maintain dynamic lists of
1027 struct ctl_table trees. */ 1033 struct ctl_table trees. */
1028struct ctl_table_header 1034struct ctl_table_header
@@ -1030,7 +1036,6 @@ struct ctl_table_header
1030 union { 1036 union {
1031 struct { 1037 struct {
1032 struct ctl_table *ctl_table; 1038 struct ctl_table *ctl_table;
1033 struct list_head ctl_entry;
1034 int used; 1039 int used;
1035 int count; 1040 int count;
1036 int nreg; 1041 int nreg;
@@ -1042,12 +1047,13 @@ struct ctl_table_header
1042 struct ctl_table_root *root; 1047 struct ctl_table_root *root;
1043 struct ctl_table_set *set; 1048 struct ctl_table_set *set;
1044 struct ctl_dir *parent; 1049 struct ctl_dir *parent;
1050 struct ctl_node *node;
1045}; 1051};
1046 1052
1047struct ctl_dir { 1053struct ctl_dir {
1048 /* Header must be at the start of ctl_dir */ 1054 /* Header must be at the start of ctl_dir */
1049 struct ctl_table_header header; 1055 struct ctl_table_header header;
1050 struct list_head list; 1056 struct rb_root root;
1051}; 1057};
1052 1058
1053struct ctl_table_set { 1059struct ctl_table_set {