diff options
author | Eric W. Biederman <ebiederm@xmission.com> | 2012-01-09 20:24:30 -0500 |
---|---|---|
committer | Eric W. Biederman <ebiederm@xmission.com> | 2012-01-24 19:40:30 -0500 |
commit | ac13ac6f4c6c0504d2c927862216f4e422a2c0b5 (patch) | |
tree | 87d0d698e79b911428b8f8e2f357a27f74c854f9 /fs/proc/proc_sysctl.c | |
parent | 9e3d47df35abd6430fed04fb40a76c7358b1e815 (diff) |
sysctl: Index sysctl directories with rbtrees.
One of the most important jobs of sysctl is to export network stack
tunables. Several of those tunables are per network device. In
several instances people are running with 1000+ network devices in
there network stacks, which makes the simple per directory linked list
in sysctl a scaling bottleneck. Replace O(N^2) sysctl insertion and
lookup times with O(NlogN) by using an rbtree to index the sysctl
directories.
Benchmark before:
make-dummies 0 999 -> 0.32s
rmmod dummy -> 0.12s
make-dummies 0 9999 -> 1m17s
rmmod dummy -> 17s
Benchmark after:
make-dummies 0 999 -> 0.074s
rmmod dummy -> 0.070s
make-dummies 0 9999 -> 3.4s
rmmod dummy -> 0.44s
Benchmark after (without dev_snmp6):
make-dummies 0 9999 -> 0.75s
rmmod dummy -> 0.44s
make-dummies 0 99999 -> 11s
rmmod dummy -> 4.3s
At 10,000 dummy devices the bottleneck becomes the time to add and
remove the files under /proc/sys/net/dev_snmp6. I have commented
out the code that adds and removes files under /proc/sys/net/dev_snmp6
and taken measurments of creating and destroying 100,000 dummies to
verify the sysctl continues to scale.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Diffstat (limited to 'fs/proc/proc_sysctl.c')
-rw-r--r-- | fs/proc/proc_sysctl.c | 224 |
1 files changed, 134 insertions, 90 deletions
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index e971ccccac4a..05c393a5c530 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
@@ -33,12 +33,10 @@ static struct ctl_table root_table[] = { | |||
33 | { } | 33 | { } |
34 | }; | 34 | }; |
35 | static struct ctl_table_root sysctl_table_root = { | 35 | static struct ctl_table_root sysctl_table_root = { |
36 | .default_set.dir.list = LIST_HEAD_INIT(sysctl_table_root.default_set.dir.list), | ||
37 | .default_set.dir.header = { | 36 | .default_set.dir.header = { |
38 | {{.count = 1, | 37 | {{.count = 1, |
39 | .nreg = 1, | 38 | .nreg = 1, |
40 | .ctl_table = root_table, | 39 | .ctl_table = root_table }}, |
41 | .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.dir.header.ctl_entry),}}, | ||
42 | .ctl_table_arg = root_table, | 40 | .ctl_table_arg = root_table, |
43 | .root = &sysctl_table_root, | 41 | .root = &sysctl_table_root, |
44 | .set = &sysctl_table_root.default_set, | 42 | .set = &sysctl_table_root.default_set, |
@@ -52,7 +50,6 @@ static int sysctl_follow_link(struct ctl_table_header **phead, | |||
52 | struct ctl_table **pentry, struct nsproxy *namespaces); | 50 | struct ctl_table **pentry, struct nsproxy *namespaces); |
53 | static int insert_links(struct ctl_table_header *head); | 51 | static int insert_links(struct ctl_table_header *head); |
54 | static void put_links(struct ctl_table_header *header); | 52 | static void put_links(struct ctl_table_header *header); |
55 | static int sysctl_check_dups(struct ctl_dir *dir, struct ctl_table *table); | ||
56 | 53 | ||
57 | static void sysctl_print_dir(struct ctl_dir *dir) | 54 | static void sysctl_print_dir(struct ctl_dir *dir) |
58 | { | 55 | { |
@@ -81,28 +78,83 @@ static struct ctl_table *find_entry(struct ctl_table_header **phead, | |||
81 | { | 78 | { |
82 | struct ctl_table_header *head; | 79 | struct ctl_table_header *head; |
83 | struct ctl_table *entry; | 80 | struct ctl_table *entry; |
81 | struct rb_node *node = dir->root.rb_node; | ||
84 | 82 | ||
85 | list_for_each_entry(head, &dir->list, ctl_entry) { | 83 | while (node) |
86 | if (head->unregistering) | 84 | { |
87 | continue; | 85 | struct ctl_node *ctl_node; |
88 | for (entry = head->ctl_table; entry->procname; entry++) { | 86 | const char *procname; |
89 | const char *procname = entry->procname; | 87 | int cmp; |
90 | if (namecmp(procname, strlen(procname), name, namelen) == 0) { | 88 | |
91 | *phead = head; | 89 | ctl_node = rb_entry(node, struct ctl_node, node); |
92 | return entry; | 90 | head = ctl_node->header; |
93 | } | 91 | entry = &head->ctl_table[ctl_node - head->node]; |
92 | procname = entry->procname; | ||
93 | |||
94 | cmp = namecmp(name, namelen, procname, strlen(procname)); | ||
95 | if (cmp < 0) | ||
96 | node = node->rb_left; | ||
97 | else if (cmp > 0) | ||
98 | node = node->rb_right; | ||
99 | else { | ||
100 | *phead = head; | ||
101 | return entry; | ||
94 | } | 102 | } |
95 | } | 103 | } |
96 | return NULL; | 104 | return NULL; |
97 | } | 105 | } |
98 | 106 | ||
107 | static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry) | ||
108 | { | ||
109 | struct rb_node *node = &head->node[entry - head->ctl_table].node; | ||
110 | struct rb_node **p = &head->parent->root.rb_node; | ||
111 | struct rb_node *parent = NULL; | ||
112 | const char *name = entry->procname; | ||
113 | int namelen = strlen(name); | ||
114 | |||
115 | while (*p) { | ||
116 | struct ctl_table_header *parent_head; | ||
117 | struct ctl_table *parent_entry; | ||
118 | struct ctl_node *parent_node; | ||
119 | const char *parent_name; | ||
120 | int cmp; | ||
121 | |||
122 | parent = *p; | ||
123 | parent_node = rb_entry(parent, struct ctl_node, node); | ||
124 | parent_head = parent_node->header; | ||
125 | parent_entry = &parent_head->ctl_table[parent_node - parent_head->node]; | ||
126 | parent_name = parent_entry->procname; | ||
127 | |||
128 | cmp = namecmp(name, namelen, parent_name, strlen(parent_name)); | ||
129 | if (cmp < 0) | ||
130 | p = &(*p)->rb_left; | ||
131 | else if (cmp > 0) | ||
132 | p = &(*p)->rb_right; | ||
133 | else { | ||
134 | printk(KERN_ERR "sysctl duplicate entry: "); | ||
135 | sysctl_print_dir(head->parent); | ||
136 | printk(KERN_CONT "/%s\n", entry->procname); | ||
137 | return -EEXIST; | ||
138 | } | ||
139 | } | ||
140 | |||
141 | rb_link_node(node, parent, p); | ||
142 | return 0; | ||
143 | } | ||
144 | |||
145 | static void erase_entry(struct ctl_table_header *head, struct ctl_table *entry) | ||
146 | { | ||
147 | struct rb_node *node = &head->node[entry - head->ctl_table].node; | ||
148 | |||
149 | rb_erase(node, &head->parent->root); | ||
150 | } | ||
151 | |||
99 | static void init_header(struct ctl_table_header *head, | 152 | static void init_header(struct ctl_table_header *head, |
100 | struct ctl_table_root *root, struct ctl_table_set *set, | 153 | struct ctl_table_root *root, struct ctl_table_set *set, |
101 | struct ctl_table *table) | 154 | struct ctl_node *node, struct ctl_table *table) |
102 | { | 155 | { |
103 | head->ctl_table = table; | 156 | head->ctl_table = table; |
104 | head->ctl_table_arg = table; | 157 | head->ctl_table_arg = table; |
105 | INIT_LIST_HEAD(&head->ctl_entry); | ||
106 | head->used = 0; | 158 | head->used = 0; |
107 | head->count = 1; | 159 | head->count = 1; |
108 | head->nreg = 1; | 160 | head->nreg = 1; |
@@ -110,28 +162,42 @@ static void init_header(struct ctl_table_header *head, | |||
110 | head->root = root; | 162 | head->root = root; |
111 | head->set = set; | 163 | head->set = set; |
112 | head->parent = NULL; | 164 | head->parent = NULL; |
165 | head->node = node; | ||
166 | if (node) { | ||
167 | struct ctl_table *entry; | ||
168 | for (entry = table; entry->procname; entry++, node++) { | ||
169 | rb_init_node(&node->node); | ||
170 | node->header = head; | ||
171 | } | ||
172 | } | ||
113 | } | 173 | } |
114 | 174 | ||
115 | static void erase_header(struct ctl_table_header *head) | 175 | static void erase_header(struct ctl_table_header *head) |
116 | { | 176 | { |
117 | list_del_init(&head->ctl_entry); | 177 | struct ctl_table *entry; |
178 | for (entry = head->ctl_table; entry->procname; entry++) | ||
179 | erase_entry(head, entry); | ||
118 | } | 180 | } |
119 | 181 | ||
120 | static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) | 182 | static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) |
121 | { | 183 | { |
184 | struct ctl_table *entry; | ||
122 | int err; | 185 | int err; |
123 | 186 | ||
124 | err = sysctl_check_dups(dir, header->ctl_table); | ||
125 | if (err) | ||
126 | return err; | ||
127 | |||
128 | dir->header.nreg++; | 187 | dir->header.nreg++; |
129 | header->parent = dir; | 188 | header->parent = dir; |
130 | err = insert_links(header); | 189 | err = insert_links(header); |
131 | if (err) | 190 | if (err) |
132 | goto fail_links; | 191 | goto fail_links; |
133 | list_add_tail(&header->ctl_entry, &header->parent->list); | 192 | for (entry = header->ctl_table; entry->procname; entry++) { |
193 | err = insert_entry(header, entry); | ||
194 | if (err) | ||
195 | goto fail; | ||
196 | } | ||
134 | return 0; | 197 | return 0; |
198 | fail: | ||
199 | erase_header(header); | ||
200 | put_links(header); | ||
135 | fail_links: | 201 | fail_links: |
136 | header->parent = NULL; | 202 | header->parent = NULL; |
137 | drop_sysctl_table(&dir->header); | 203 | drop_sysctl_table(&dir->header); |
@@ -241,19 +307,14 @@ static struct ctl_table *lookup_entry(struct ctl_table_header **phead, | |||
241 | return entry; | 307 | return entry; |
242 | } | 308 | } |
243 | 309 | ||
244 | static struct ctl_table_header *next_usable_entry(struct ctl_dir *dir, | 310 | static struct ctl_node *first_usable_entry(struct rb_node *node) |
245 | struct list_head *tmp) | ||
246 | { | 311 | { |
247 | struct ctl_table_header *head; | 312 | struct ctl_node *ctl_node; |
248 | |||
249 | for (tmp = tmp->next; tmp != &dir->list; tmp = tmp->next) { | ||
250 | head = list_entry(tmp, struct ctl_table_header, ctl_entry); | ||
251 | 313 | ||
252 | if (!head->ctl_table->procname || | 314 | for (;node; node = rb_next(node)) { |
253 | !use_table(head)) | 315 | ctl_node = rb_entry(node, struct ctl_node, node); |
254 | continue; | 316 | if (use_table(ctl_node->header)) |
255 | 317 | return ctl_node; | |
256 | return head; | ||
257 | } | 318 | } |
258 | return NULL; | 319 | return NULL; |
259 | } | 320 | } |
@@ -261,14 +322,17 @@ static struct ctl_table_header *next_usable_entry(struct ctl_dir *dir, | |||
261 | static void first_entry(struct ctl_dir *dir, | 322 | static void first_entry(struct ctl_dir *dir, |
262 | struct ctl_table_header **phead, struct ctl_table **pentry) | 323 | struct ctl_table_header **phead, struct ctl_table **pentry) |
263 | { | 324 | { |
264 | struct ctl_table_header *head; | 325 | struct ctl_table_header *head = NULL; |
265 | struct ctl_table *entry = NULL; | 326 | struct ctl_table *entry = NULL; |
327 | struct ctl_node *ctl_node; | ||
266 | 328 | ||
267 | spin_lock(&sysctl_lock); | 329 | spin_lock(&sysctl_lock); |
268 | head = next_usable_entry(dir, &dir->list); | 330 | ctl_node = first_usable_entry(rb_first(&dir->root)); |
269 | spin_unlock(&sysctl_lock); | 331 | spin_unlock(&sysctl_lock); |
270 | if (head) | 332 | if (ctl_node) { |
271 | entry = head->ctl_table; | 333 | head = ctl_node->header; |
334 | entry = &head->ctl_table[ctl_node - head->node]; | ||
335 | } | ||
272 | *phead = head; | 336 | *phead = head; |
273 | *pentry = entry; | 337 | *pentry = entry; |
274 | } | 338 | } |
@@ -277,15 +341,17 @@ static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentr | |||
277 | { | 341 | { |
278 | struct ctl_table_header *head = *phead; | 342 | struct ctl_table_header *head = *phead; |
279 | struct ctl_table *entry = *pentry; | 343 | struct ctl_table *entry = *pentry; |
344 | struct ctl_node *ctl_node = &head->node[entry - head->ctl_table]; | ||
280 | 345 | ||
281 | entry++; | 346 | spin_lock(&sysctl_lock); |
282 | if (!entry->procname) { | 347 | unuse_table(head); |
283 | spin_lock(&sysctl_lock); | 348 | |
284 | unuse_table(head); | 349 | ctl_node = first_usable_entry(rb_next(&ctl_node->node)); |
285 | head = next_usable_entry(head->parent, &head->ctl_entry); | 350 | spin_unlock(&sysctl_lock); |
286 | spin_unlock(&sysctl_lock); | 351 | head = NULL; |
287 | if (head) | 352 | if (ctl_node) { |
288 | entry = head->ctl_table; | 353 | head = ctl_node->header; |
354 | entry = &head->ctl_table[ctl_node - head->node]; | ||
289 | } | 355 | } |
290 | *phead = head; | 356 | *phead = head; |
291 | *pentry = entry; | 357 | *pentry = entry; |
@@ -777,21 +843,23 @@ static struct ctl_dir *new_dir(struct ctl_table_set *set, | |||
777 | { | 843 | { |
778 | struct ctl_table *table; | 844 | struct ctl_table *table; |
779 | struct ctl_dir *new; | 845 | struct ctl_dir *new; |
846 | struct ctl_node *node; | ||
780 | char *new_name; | 847 | char *new_name; |
781 | 848 | ||
782 | new = kzalloc(sizeof(*new) + sizeof(struct ctl_table)*2 + | 849 | new = kzalloc(sizeof(*new) + sizeof(struct ctl_node) + |
783 | namelen + 1, GFP_KERNEL); | 850 | sizeof(struct ctl_table)*2 + namelen + 1, |
851 | GFP_KERNEL); | ||
784 | if (!new) | 852 | if (!new) |
785 | return NULL; | 853 | return NULL; |
786 | 854 | ||
787 | table = (struct ctl_table *)(new + 1); | 855 | node = (struct ctl_node *)(new + 1); |
856 | table = (struct ctl_table *)(node + 1); | ||
788 | new_name = (char *)(table + 2); | 857 | new_name = (char *)(table + 2); |
789 | memcpy(new_name, name, namelen); | 858 | memcpy(new_name, name, namelen); |
790 | new_name[namelen] = '\0'; | 859 | new_name[namelen] = '\0'; |
791 | INIT_LIST_HEAD(&new->list); | ||
792 | table[0].procname = new_name; | 860 | table[0].procname = new_name; |
793 | table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO; | 861 | table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO; |
794 | init_header(&new->header, set->dir.header.root, set, table); | 862 | init_header(&new->header, set->dir.header.root, set, node, table); |
795 | 863 | ||
796 | return new; | 864 | return new; |
797 | } | 865 | } |
@@ -892,40 +960,6 @@ static int sysctl_follow_link(struct ctl_table_header **phead, | |||
892 | return ret; | 960 | return ret; |
893 | } | 961 | } |
894 | 962 | ||
895 | static int sysctl_check_table_dups(struct ctl_dir *dir, struct ctl_table *old, | ||
896 | struct ctl_table *table) | ||
897 | { | ||
898 | struct ctl_table *entry, *test; | ||
899 | int error = 0; | ||
900 | |||
901 | for (entry = old; entry->procname; entry++) { | ||
902 | for (test = table; test->procname; test++) { | ||
903 | if (strcmp(entry->procname, test->procname) == 0) { | ||
904 | printk(KERN_ERR "sysctl duplicate entry: "); | ||
905 | sysctl_print_dir(dir); | ||
906 | printk(KERN_CONT "/%s\n", test->procname); | ||
907 | error = -EEXIST; | ||
908 | } | ||
909 | } | ||
910 | } | ||
911 | return error; | ||
912 | } | ||
913 | |||
914 | static int sysctl_check_dups(struct ctl_dir *dir, struct ctl_table *table) | ||
915 | { | ||
916 | struct ctl_table_header *head; | ||
917 | int error = 0; | ||
918 | |||
919 | list_for_each_entry(head, &dir->list, ctl_entry) { | ||
920 | if (head->unregistering) | ||
921 | continue; | ||
922 | if (head->parent != dir) | ||
923 | continue; | ||
924 | error = sysctl_check_table_dups(dir, head->ctl_table, table); | ||
925 | } | ||
926 | return error; | ||
927 | } | ||
928 | |||
929 | static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...) | 963 | static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...) |
930 | { | 964 | { |
931 | struct va_format vaf; | 965 | struct va_format vaf; |
@@ -977,6 +1011,7 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table | |||
977 | { | 1011 | { |
978 | struct ctl_table *link_table, *entry, *link; | 1012 | struct ctl_table *link_table, *entry, *link; |
979 | struct ctl_table_header *links; | 1013 | struct ctl_table_header *links; |
1014 | struct ctl_node *node; | ||
980 | char *link_name; | 1015 | char *link_name; |
981 | int nr_entries, name_bytes; | 1016 | int nr_entries, name_bytes; |
982 | 1017 | ||
@@ -988,6 +1023,7 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table | |||
988 | } | 1023 | } |
989 | 1024 | ||
990 | links = kzalloc(sizeof(struct ctl_table_header) + | 1025 | links = kzalloc(sizeof(struct ctl_table_header) + |
1026 | sizeof(struct ctl_node)*nr_entries + | ||
991 | sizeof(struct ctl_table)*(nr_entries + 1) + | 1027 | sizeof(struct ctl_table)*(nr_entries + 1) + |
992 | name_bytes, | 1028 | name_bytes, |
993 | GFP_KERNEL); | 1029 | GFP_KERNEL); |
@@ -995,7 +1031,8 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table | |||
995 | if (!links) | 1031 | if (!links) |
996 | return NULL; | 1032 | return NULL; |
997 | 1033 | ||
998 | link_table = (struct ctl_table *)(links + 1); | 1034 | node = (struct ctl_node *)(links + 1); |
1035 | link_table = (struct ctl_table *)(node + nr_entries); | ||
999 | link_name = (char *)&link_table[nr_entries + 1]; | 1036 | link_name = (char *)&link_table[nr_entries + 1]; |
1000 | 1037 | ||
1001 | for (link = link_table, entry = table; entry->procname; link++, entry++) { | 1038 | for (link = link_table, entry = table; entry->procname; link++, entry++) { |
@@ -1006,7 +1043,7 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table | |||
1006 | link->data = link_root; | 1043 | link->data = link_root; |
1007 | link_name += len; | 1044 | link_name += len; |
1008 | } | 1045 | } |
1009 | init_header(links, dir->header.root, dir->header.set, link_table); | 1046 | init_header(links, dir->header.root, dir->header.set, node, link_table); |
1010 | links->nreg = nr_entries; | 1047 | links->nreg = nr_entries; |
1011 | 1048 | ||
1012 | return links; | 1049 | return links; |
@@ -1132,12 +1169,20 @@ struct ctl_table_header *__register_sysctl_table( | |||
1132 | struct ctl_table_header *header; | 1169 | struct ctl_table_header *header; |
1133 | const char *name, *nextname; | 1170 | const char *name, *nextname; |
1134 | struct ctl_dir *dir; | 1171 | struct ctl_dir *dir; |
1172 | struct ctl_table *entry; | ||
1173 | struct ctl_node *node; | ||
1174 | int nr_entries = 0; | ||
1175 | |||
1176 | for (entry = table; entry->procname; entry++) | ||
1177 | nr_entries++; | ||
1135 | 1178 | ||
1136 | header = kzalloc(sizeof(struct ctl_table_header), GFP_KERNEL); | 1179 | header = kzalloc(sizeof(struct ctl_table_header) + |
1180 | sizeof(struct ctl_node)*nr_entries, GFP_KERNEL); | ||
1137 | if (!header) | 1181 | if (!header) |
1138 | return NULL; | 1182 | return NULL; |
1139 | 1183 | ||
1140 | init_header(header, root, set, table); | 1184 | node = (struct ctl_node *)(header + 1); |
1185 | init_header(header, root, set, node, table); | ||
1141 | if (sysctl_check_table(path, table)) | 1186 | if (sysctl_check_table(path, table)) |
1142 | goto fail; | 1187 | goto fail; |
1143 | 1188 | ||
@@ -1489,13 +1534,12 @@ void setup_sysctl_set(struct ctl_table_set *set, | |||
1489 | { | 1534 | { |
1490 | memset(set, sizeof(*set), 0); | 1535 | memset(set, sizeof(*set), 0); |
1491 | set->is_seen = is_seen; | 1536 | set->is_seen = is_seen; |
1492 | INIT_LIST_HEAD(&set->dir.list); | 1537 | init_header(&set->dir.header, root, set, NULL, root_table); |
1493 | init_header(&set->dir.header, root, set, root_table); | ||
1494 | } | 1538 | } |
1495 | 1539 | ||
1496 | void retire_sysctl_set(struct ctl_table_set *set) | 1540 | void retire_sysctl_set(struct ctl_table_set *set) |
1497 | { | 1541 | { |
1498 | WARN_ON(!list_empty(&set->dir.list)); | 1542 | WARN_ON(!RB_EMPTY_ROOT(&set->dir.root)); |
1499 | } | 1543 | } |
1500 | 1544 | ||
1501 | int __init proc_sys_init(void) | 1545 | int __init proc_sys_init(void) |