aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-23 21:08:58 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-23 21:08:58 -0400
commitf1d38e423a697b7aa06e12d3ca4753bcc1aa3531 (patch)
tree1cbfd86070f724d5ffe53146d4c67edf14cccf98
parentdae430c6f6e5d0b98c238c340a41a39e221e8940 (diff)
parent4e474a00d7ff746ed177ddae14fa8b2d4bad7a00 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/sysctl
Pull sysctl updates from Eric Biederman: - Rewrite of sysctl for speed and clarity. Insert/remove/Lookup in sysctl are all now O(NlogN) operations, and are no longer bottlenecks in the process of adding and removing network devices. sysctl is now focused on being a filesystem instead of system call and the code can all be found in fs/proc/proc_sysctl.c. Hopefully this means the code is now approachable. Much thanks is owed to Lucian Grinjincu for keeping at this until something was found that was usable. - The recent proc_sys_poll oops found by the fuzzer during hibernation is fixed. * git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/sysctl: (36 commits) sysctl: protect poll() in entries that may go away sysctl: Don't call sysctl_follow_link unless we are a link. sysctl: Comments to make the code clearer. sysctl: Correct error return from get_subdir sysctl: An easier to read version of find_subdir sysctl: fix memset parameters in setup_sysctl_set() sysctl: remove an unused variable sysctl: Add register_sysctl for normal sysctl users sysctl: Index sysctl directories with rbtrees. sysctl: Make the header lists per directory. sysctl: Move sysctl_check_dups into insert_header sysctl: Modify __register_sysctl_paths to take a set instead of a root and an nsproxy sysctl: Replace root_list with links between sysctl_table_sets. sysctl: Add sysctl_print_dir and use it in get_subdir sysctl: Stop requiring explicit management of sysctl directories sysctl: Add a root pointer to ctl_table_set sysctl: Rewrite proc_sys_readdir in terms of first_entry and next_entry sysctl: Rewrite proc_sys_lookup introducing find_entry and lookup_entry. sysctl: Normalize the root_table data structure. sysctl: Factor out insert_header and erase_header ...
-rw-r--r--fs/proc/internal.h3
-rw-r--r--fs/proc/proc_sysctl.c1274
-rw-r--r--include/linux/sysctl.h106
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/sysctl.c501
-rw-r--r--kernel/sysctl_check.c160
-rw-r--r--lib/Kconfig.debug8
-rw-r--r--net/sysctl_net.c24
8 files changed, 1280 insertions, 797 deletions
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c44efe19798f..5f79bb8b4c60 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -10,12 +10,15 @@
10 */ 10 */
11 11
12#include <linux/proc_fs.h> 12#include <linux/proc_fs.h>
13struct ctl_table_header;
13 14
14extern struct proc_dir_entry proc_root; 15extern struct proc_dir_entry proc_root;
15#ifdef CONFIG_PROC_SYSCTL 16#ifdef CONFIG_PROC_SYSCTL
16extern int proc_sys_init(void); 17extern int proc_sys_init(void);
18extern void sysctl_head_put(struct ctl_table_header *head);
17#else 19#else
18static inline void proc_sys_init(void) { } 20static inline void proc_sys_init(void) { }
21static inline void sysctl_head_put(struct ctl_table_header *head) { }
19#endif 22#endif
20#ifdef CONFIG_NET 23#ifdef CONFIG_NET
21extern int proc_net_init(void); 24extern int proc_net_init(void);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 67bbf6e4e197..21d836f40292 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -9,6 +9,7 @@
9#include <linux/sched.h> 9#include <linux/sched.h>
10#include <linux/namei.h> 10#include <linux/namei.h>
11#include <linux/mm.h> 11#include <linux/mm.h>
12#include <linux/module.h>
12#include "internal.h" 13#include "internal.h"
13 14
14static const struct dentry_operations proc_sys_dentry_operations; 15static const struct dentry_operations proc_sys_dentry_operations;
@@ -26,6 +27,371 @@ void proc_sys_poll_notify(struct ctl_table_poll *poll)
26 wake_up_interruptible(&poll->wait); 27 wake_up_interruptible(&poll->wait);
27} 28}
28 29
30static struct ctl_table root_table[] = {
31 {
32 .procname = "",
33 .mode = S_IFDIR|S_IRUGO|S_IXUGO,
34 },
35 { }
36};
37static struct ctl_table_root sysctl_table_root = {
38 .default_set.dir.header = {
39 {{.count = 1,
40 .nreg = 1,
41 .ctl_table = root_table }},
42 .ctl_table_arg = root_table,
43 .root = &sysctl_table_root,
44 .set = &sysctl_table_root.default_set,
45 },
46};
47
48static DEFINE_SPINLOCK(sysctl_lock);
49
50static void drop_sysctl_table(struct ctl_table_header *header);
51static int sysctl_follow_link(struct ctl_table_header **phead,
52 struct ctl_table **pentry, struct nsproxy *namespaces);
53static int insert_links(struct ctl_table_header *head);
54static void put_links(struct ctl_table_header *header);
55
56static void sysctl_print_dir(struct ctl_dir *dir)
57{
58 if (dir->header.parent)
59 sysctl_print_dir(dir->header.parent);
60 printk(KERN_CONT "%s/", dir->header.ctl_table[0].procname);
61}
62
63static int namecmp(const char *name1, int len1, const char *name2, int len2)
64{
65 int minlen;
66 int cmp;
67
68 minlen = len1;
69 if (minlen > len2)
70 minlen = len2;
71
72 cmp = memcmp(name1, name2, minlen);
73 if (cmp == 0)
74 cmp = len1 - len2;
75 return cmp;
76}
77
78/* Called under sysctl_lock */
79static struct ctl_table *find_entry(struct ctl_table_header **phead,
80 struct ctl_dir *dir, const char *name, int namelen)
81{
82 struct ctl_table_header *head;
83 struct ctl_table *entry;
84 struct rb_node *node = dir->root.rb_node;
85
86 while (node)
87 {
88 struct ctl_node *ctl_node;
89 const char *procname;
90 int cmp;
91
92 ctl_node = rb_entry(node, struct ctl_node, node);
93 head = ctl_node->header;
94 entry = &head->ctl_table[ctl_node - head->node];
95 procname = entry->procname;
96
97 cmp = namecmp(name, namelen, procname, strlen(procname));
98 if (cmp < 0)
99 node = node->rb_left;
100 else if (cmp > 0)
101 node = node->rb_right;
102 else {
103 *phead = head;
104 return entry;
105 }
106 }
107 return NULL;
108}
109
110static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry)
111{
112 struct rb_node *node = &head->node[entry - head->ctl_table].node;
113 struct rb_node **p = &head->parent->root.rb_node;
114 struct rb_node *parent = NULL;
115 const char *name = entry->procname;
116 int namelen = strlen(name);
117
118 while (*p) {
119 struct ctl_table_header *parent_head;
120 struct ctl_table *parent_entry;
121 struct ctl_node *parent_node;
122 const char *parent_name;
123 int cmp;
124
125 parent = *p;
126 parent_node = rb_entry(parent, struct ctl_node, node);
127 parent_head = parent_node->header;
128 parent_entry = &parent_head->ctl_table[parent_node - parent_head->node];
129 parent_name = parent_entry->procname;
130
131 cmp = namecmp(name, namelen, parent_name, strlen(parent_name));
132 if (cmp < 0)
133 p = &(*p)->rb_left;
134 else if (cmp > 0)
135 p = &(*p)->rb_right;
136 else {
137 printk(KERN_ERR "sysctl duplicate entry: ");
138 sysctl_print_dir(head->parent);
139 printk(KERN_CONT "/%s\n", entry->procname);
140 return -EEXIST;
141 }
142 }
143
144 rb_link_node(node, parent, p);
145 return 0;
146}
147
148static void erase_entry(struct ctl_table_header *head, struct ctl_table *entry)
149{
150 struct rb_node *node = &head->node[entry - head->ctl_table].node;
151
152 rb_erase(node, &head->parent->root);
153}
154
155static void init_header(struct ctl_table_header *head,
156 struct ctl_table_root *root, struct ctl_table_set *set,
157 struct ctl_node *node, struct ctl_table *table)
158{
159 head->ctl_table = table;
160 head->ctl_table_arg = table;
161 head->used = 0;
162 head->count = 1;
163 head->nreg = 1;
164 head->unregistering = NULL;
165 head->root = root;
166 head->set = set;
167 head->parent = NULL;
168 head->node = node;
169 if (node) {
170 struct ctl_table *entry;
171 for (entry = table; entry->procname; entry++, node++) {
172 rb_init_node(&node->node);
173 node->header = head;
174 }
175 }
176}
177
178static void erase_header(struct ctl_table_header *head)
179{
180 struct ctl_table *entry;
181 for (entry = head->ctl_table; entry->procname; entry++)
182 erase_entry(head, entry);
183}
184
185static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header)
186{
187 struct ctl_table *entry;
188 int err;
189
190 dir->header.nreg++;
191 header->parent = dir;
192 err = insert_links(header);
193 if (err)
194 goto fail_links;
195 for (entry = header->ctl_table; entry->procname; entry++) {
196 err = insert_entry(header, entry);
197 if (err)
198 goto fail;
199 }
200 return 0;
201fail:
202 erase_header(header);
203 put_links(header);
204fail_links:
205 header->parent = NULL;
206 drop_sysctl_table(&dir->header);
207 return err;
208}
209
210/* called under sysctl_lock */
211static int use_table(struct ctl_table_header *p)
212{
213 if (unlikely(p->unregistering))
214 return 0;
215 p->used++;
216 return 1;
217}
218
219/* called under sysctl_lock */
220static void unuse_table(struct ctl_table_header *p)
221{
222 if (!--p->used)
223 if (unlikely(p->unregistering))
224 complete(p->unregistering);
225}
226
227/* called under sysctl_lock, will reacquire if has to wait */
228static void start_unregistering(struct ctl_table_header *p)
229{
230 /*
231 * if p->used is 0, nobody will ever touch that entry again;
232 * we'll eliminate all paths to it before dropping sysctl_lock
233 */
234 if (unlikely(p->used)) {
235 struct completion wait;
236 init_completion(&wait);
237 p->unregistering = &wait;
238 spin_unlock(&sysctl_lock);
239 wait_for_completion(&wait);
240 spin_lock(&sysctl_lock);
241 } else {
242 /* anything non-NULL; we'll never dereference it */
243 p->unregistering = ERR_PTR(-EINVAL);
244 }
245 /*
246 * do not remove from the list until nobody holds it; walking the
247 * list in do_sysctl() relies on that.
248 */
249 erase_header(p);
250}
251
252static void sysctl_head_get(struct ctl_table_header *head)
253{
254 spin_lock(&sysctl_lock);
255 head->count++;
256 spin_unlock(&sysctl_lock);
257}
258
259void sysctl_head_put(struct ctl_table_header *head)
260{
261 spin_lock(&sysctl_lock);
262 if (!--head->count)
263 kfree_rcu(head, rcu);
264 spin_unlock(&sysctl_lock);
265}
266
267static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
268{
269 if (!head)
270 BUG();
271 spin_lock(&sysctl_lock);
272 if (!use_table(head))
273 head = ERR_PTR(-ENOENT);
274 spin_unlock(&sysctl_lock);
275 return head;
276}
277
278static void sysctl_head_finish(struct ctl_table_header *head)
279{
280 if (!head)
281 return;
282 spin_lock(&sysctl_lock);
283 unuse_table(head);
284 spin_unlock(&sysctl_lock);
285}
286
287static struct ctl_table_set *
288lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
289{
290 struct ctl_table_set *set = &root->default_set;
291 if (root->lookup)
292 set = root->lookup(root, namespaces);
293 return set;
294}
295
296static struct ctl_table *lookup_entry(struct ctl_table_header **phead,
297 struct ctl_dir *dir,
298 const char *name, int namelen)
299{
300 struct ctl_table_header *head;
301 struct ctl_table *entry;
302
303 spin_lock(&sysctl_lock);
304 entry = find_entry(&head, dir, name, namelen);
305 if (entry && use_table(head))
306 *phead = head;
307 else
308 entry = NULL;
309 spin_unlock(&sysctl_lock);
310 return entry;
311}
312
313static struct ctl_node *first_usable_entry(struct rb_node *node)
314{
315 struct ctl_node *ctl_node;
316
317 for (;node; node = rb_next(node)) {
318 ctl_node = rb_entry(node, struct ctl_node, node);
319 if (use_table(ctl_node->header))
320 return ctl_node;
321 }
322 return NULL;
323}
324
325static void first_entry(struct ctl_dir *dir,
326 struct ctl_table_header **phead, struct ctl_table **pentry)
327{
328 struct ctl_table_header *head = NULL;
329 struct ctl_table *entry = NULL;
330 struct ctl_node *ctl_node;
331
332 spin_lock(&sysctl_lock);
333 ctl_node = first_usable_entry(rb_first(&dir->root));
334 spin_unlock(&sysctl_lock);
335 if (ctl_node) {
336 head = ctl_node->header;
337 entry = &head->ctl_table[ctl_node - head->node];
338 }
339 *phead = head;
340 *pentry = entry;
341}
342
343static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentry)
344{
345 struct ctl_table_header *head = *phead;
346 struct ctl_table *entry = *pentry;
347 struct ctl_node *ctl_node = &head->node[entry - head->ctl_table];
348
349 spin_lock(&sysctl_lock);
350 unuse_table(head);
351
352 ctl_node = first_usable_entry(rb_next(&ctl_node->node));
353 spin_unlock(&sysctl_lock);
354 head = NULL;
355 if (ctl_node) {
356 head = ctl_node->header;
357 entry = &head->ctl_table[ctl_node - head->node];
358 }
359 *phead = head;
360 *pentry = entry;
361}
362
363void register_sysctl_root(struct ctl_table_root *root)
364{
365}
366
367/*
368 * sysctl_perm does NOT grant the superuser all rights automatically, because
369 * some sysctl variables are readonly even to root.
370 */
371
372static int test_perm(int mode, int op)
373{
374 if (!current_euid())
375 mode >>= 6;
376 else if (in_egroup_p(0))
377 mode >>= 3;
378 if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
379 return 0;
380 return -EACCES;
381}
382
383static int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
384{
385 int mode;
386
387 if (root->permissions)
388 mode = root->permissions(root, current->nsproxy, table);
389 else
390 mode = table->mode;
391
392 return test_perm(mode, op);
393}
394
29static struct inode *proc_sys_make_inode(struct super_block *sb, 395static struct inode *proc_sys_make_inode(struct super_block *sb,
30 struct ctl_table_header *head, struct ctl_table *table) 396 struct ctl_table_header *head, struct ctl_table *table)
31{ 397{
@@ -45,13 +411,12 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
45 411
46 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 412 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
47 inode->i_mode = table->mode; 413 inode->i_mode = table->mode;
48 if (!table->child) { 414 if (!S_ISDIR(table->mode)) {
49 inode->i_mode |= S_IFREG; 415 inode->i_mode |= S_IFREG;
50 inode->i_op = &proc_sys_inode_operations; 416 inode->i_op = &proc_sys_inode_operations;
51 inode->i_fop = &proc_sys_file_operations; 417 inode->i_fop = &proc_sys_file_operations;
52 } else { 418 } else {
53 inode->i_mode |= S_IFDIR; 419 inode->i_mode |= S_IFDIR;
54 clear_nlink(inode);
55 inode->i_op = &proc_sys_dir_operations; 420 inode->i_op = &proc_sys_dir_operations;
56 inode->i_fop = &proc_sys_dir_file_operations; 421 inode->i_fop = &proc_sys_dir_file_operations;
57 } 422 }
@@ -59,70 +424,42 @@ out:
59 return inode; 424 return inode;
60} 425}
61 426
62static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name)
63{
64 int len;
65 for ( ; p->procname; p++) {
66
67 if (!p->procname)
68 continue;
69
70 len = strlen(p->procname);
71 if (len != name->len)
72 continue;
73
74 if (memcmp(p->procname, name->name, len) != 0)
75 continue;
76
77 /* I have a match */
78 return p;
79 }
80 return NULL;
81}
82
83static struct ctl_table_header *grab_header(struct inode *inode) 427static struct ctl_table_header *grab_header(struct inode *inode)
84{ 428{
85 if (PROC_I(inode)->sysctl) 429 struct ctl_table_header *head = PROC_I(inode)->sysctl;
86 return sysctl_head_grab(PROC_I(inode)->sysctl); 430 if (!head)
87 else 431 head = &sysctl_table_root.default_set.dir.header;
88 return sysctl_head_next(NULL); 432 return sysctl_head_grab(head);
89} 433}
90 434
91static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, 435static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
92 struct nameidata *nd) 436 struct nameidata *nd)
93{ 437{
94 struct ctl_table_header *head = grab_header(dir); 438 struct ctl_table_header *head = grab_header(dir);
95 struct ctl_table *table = PROC_I(dir)->sysctl_entry;
96 struct ctl_table_header *h = NULL; 439 struct ctl_table_header *h = NULL;
97 struct qstr *name = &dentry->d_name; 440 struct qstr *name = &dentry->d_name;
98 struct ctl_table *p; 441 struct ctl_table *p;
99 struct inode *inode; 442 struct inode *inode;
100 struct dentry *err = ERR_PTR(-ENOENT); 443 struct dentry *err = ERR_PTR(-ENOENT);
444 struct ctl_dir *ctl_dir;
445 int ret;
101 446
102 if (IS_ERR(head)) 447 if (IS_ERR(head))
103 return ERR_CAST(head); 448 return ERR_CAST(head);
104 449
105 if (table && !table->child) { 450 ctl_dir = container_of(head, struct ctl_dir, header);
106 WARN_ON(1);
107 goto out;
108 }
109
110 table = table ? table->child : head->ctl_table;
111
112 p = find_in_table(table, name);
113 if (!p) {
114 for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
115 if (h->attached_to != table)
116 continue;
117 p = find_in_table(h->attached_by, name);
118 if (p)
119 break;
120 }
121 }
122 451
452 p = lookup_entry(&h, ctl_dir, name->name, name->len);
123 if (!p) 453 if (!p)
124 goto out; 454 goto out;
125 455
456 if (S_ISLNK(p->mode)) {
457 ret = sysctl_follow_link(&h, &p, current->nsproxy);
458 err = ERR_PTR(ret);
459 if (ret)
460 goto out;
461 }
462
126 err = ERR_PTR(-ENOMEM); 463 err = ERR_PTR(-ENOMEM);
127 inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); 464 inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
128 if (h) 465 if (h)
@@ -190,20 +527,32 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
190 527
191static int proc_sys_open(struct inode *inode, struct file *filp) 528static int proc_sys_open(struct inode *inode, struct file *filp)
192{ 529{
530 struct ctl_table_header *head = grab_header(inode);
193 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 531 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
194 532
533 /* sysctl was unregistered */
534 if (IS_ERR(head))
535 return PTR_ERR(head);
536
195 if (table->poll) 537 if (table->poll)
196 filp->private_data = proc_sys_poll_event(table->poll); 538 filp->private_data = proc_sys_poll_event(table->poll);
197 539
540 sysctl_head_finish(head);
541
198 return 0; 542 return 0;
199} 543}
200 544
201static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) 545static unsigned int proc_sys_poll(struct file *filp, poll_table *wait)
202{ 546{
203 struct inode *inode = filp->f_path.dentry->d_inode; 547 struct inode *inode = filp->f_path.dentry->d_inode;
548 struct ctl_table_header *head = grab_header(inode);
204 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 549 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
205 unsigned long event = (unsigned long)filp->private_data;
206 unsigned int ret = DEFAULT_POLLMASK; 550 unsigned int ret = DEFAULT_POLLMASK;
551 unsigned long event;
552
553 /* sysctl was unregistered */
554 if (IS_ERR(head))
555 return POLLERR | POLLHUP;
207 556
208 if (!table->proc_handler) 557 if (!table->proc_handler)
209 goto out; 558 goto out;
@@ -211,6 +560,7 @@ static unsigned int proc_sys_poll(struct file *filp, poll_table *wait)
211 if (!table->poll) 560 if (!table->poll)
212 goto out; 561 goto out;
213 562
563 event = (unsigned long)filp->private_data;
214 poll_wait(filp, &table->poll->wait, wait); 564 poll_wait(filp, &table->poll->wait, wait);
215 565
216 if (event != atomic_read(&table->poll->event)) { 566 if (event != atomic_read(&table->poll->event)) {
@@ -219,6 +569,8 @@ static unsigned int proc_sys_poll(struct file *filp, poll_table *wait)
219 } 569 }
220 570
221out: 571out:
572 sysctl_head_finish(head);
573
222 return ret; 574 return ret;
223} 575}
224 576
@@ -260,28 +612,45 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent,
260 return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); 612 return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type);
261} 613}
262 614
615static int proc_sys_link_fill_cache(struct file *filp, void *dirent,
616 filldir_t filldir,
617 struct ctl_table_header *head,
618 struct ctl_table *table)
619{
620 int err, ret = 0;
621 head = sysctl_head_grab(head);
622
623 if (S_ISLNK(table->mode)) {
624 /* It is not an error if we can not follow the link ignore it */
625 err = sysctl_follow_link(&head, &table, current->nsproxy);
626 if (err)
627 goto out;
628 }
629
630 ret = proc_sys_fill_cache(filp, dirent, filldir, head, table);
631out:
632 sysctl_head_finish(head);
633 return ret;
634}
635
263static int scan(struct ctl_table_header *head, ctl_table *table, 636static int scan(struct ctl_table_header *head, ctl_table *table,
264 unsigned long *pos, struct file *file, 637 unsigned long *pos, struct file *file,
265 void *dirent, filldir_t filldir) 638 void *dirent, filldir_t filldir)
266{ 639{
640 int res;
267 641
268 for (; table->procname; table++, (*pos)++) { 642 if ((*pos)++ < file->f_pos)
269 int res; 643 return 0;
270
271 /* Can't do anything without a proc name */
272 if (!table->procname)
273 continue;
274
275 if (*pos < file->f_pos)
276 continue;
277 644
645 if (unlikely(S_ISLNK(table->mode)))
646 res = proc_sys_link_fill_cache(file, dirent, filldir, head, table);
647 else
278 res = proc_sys_fill_cache(file, dirent, filldir, head, table); 648 res = proc_sys_fill_cache(file, dirent, filldir, head, table);
279 if (res)
280 return res;
281 649
282 file->f_pos = *pos + 1; 650 if (res == 0)
283 } 651 file->f_pos = *pos;
284 return 0; 652
653 return res;
285} 654}
286 655
287static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) 656static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
@@ -289,20 +658,16 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
289 struct dentry *dentry = filp->f_path.dentry; 658 struct dentry *dentry = filp->f_path.dentry;
290 struct inode *inode = dentry->d_inode; 659 struct inode *inode = dentry->d_inode;
291 struct ctl_table_header *head = grab_header(inode); 660 struct ctl_table_header *head = grab_header(inode);
292 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
293 struct ctl_table_header *h = NULL; 661 struct ctl_table_header *h = NULL;
662 struct ctl_table *entry;
663 struct ctl_dir *ctl_dir;
294 unsigned long pos; 664 unsigned long pos;
295 int ret = -EINVAL; 665 int ret = -EINVAL;
296 666
297 if (IS_ERR(head)) 667 if (IS_ERR(head))
298 return PTR_ERR(head); 668 return PTR_ERR(head);
299 669
300 if (table && !table->child) { 670 ctl_dir = container_of(head, struct ctl_dir, header);
301 WARN_ON(1);
302 goto out;
303 }
304
305 table = table ? table->child : head->ctl_table;
306 671
307 ret = 0; 672 ret = 0;
308 /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ 673 /* Avoid a switch here: arm builds fail with missing __cmpdi2 */
@@ -320,14 +685,8 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
320 } 685 }
321 pos = 2; 686 pos = 2;
322 687
323 ret = scan(head, table, &pos, filp, dirent, filldir); 688 for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) {
324 if (ret) 689 ret = scan(h, entry, &pos, filp, dirent, filldir);
325 goto out;
326
327 for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
328 if (h->attached_to != table)
329 continue;
330 ret = scan(h, h->attached_by, &pos, filp, dirent, filldir);
331 if (ret) { 690 if (ret) {
332 sysctl_head_finish(h); 691 sysctl_head_finish(h);
333 break; 692 break;
@@ -447,6 +806,21 @@ static int proc_sys_delete(const struct dentry *dentry)
447 return !!PROC_I(dentry->d_inode)->sysctl->unregistering; 806 return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
448} 807}
449 808
809static int sysctl_is_seen(struct ctl_table_header *p)
810{
811 struct ctl_table_set *set = p->set;
812 int res;
813 spin_lock(&sysctl_lock);
814 if (p->unregistering)
815 res = 0;
816 else if (!set->is_seen)
817 res = 1;
818 else
819 res = set->is_seen(set);
820 spin_unlock(&sysctl_lock);
821 return res;
822}
823
450static int proc_sys_compare(const struct dentry *parent, 824static int proc_sys_compare(const struct dentry *parent,
451 const struct inode *pinode, 825 const struct inode *pinode,
452 const struct dentry *dentry, const struct inode *inode, 826 const struct dentry *dentry, const struct inode *inode,
@@ -472,6 +846,753 @@ static const struct dentry_operations proc_sys_dentry_operations = {
472 .d_compare = proc_sys_compare, 846 .d_compare = proc_sys_compare,
473}; 847};
474 848
849static struct ctl_dir *find_subdir(struct ctl_dir *dir,
850 const char *name, int namelen)
851{
852 struct ctl_table_header *head;
853 struct ctl_table *entry;
854
855 entry = find_entry(&head, dir, name, namelen);
856 if (!entry)
857 return ERR_PTR(-ENOENT);
858 if (!S_ISDIR(entry->mode))
859 return ERR_PTR(-ENOTDIR);
860 return container_of(head, struct ctl_dir, header);
861}
862
863static struct ctl_dir *new_dir(struct ctl_table_set *set,
864 const char *name, int namelen)
865{
866 struct ctl_table *table;
867 struct ctl_dir *new;
868 struct ctl_node *node;
869 char *new_name;
870
871 new = kzalloc(sizeof(*new) + sizeof(struct ctl_node) +
872 sizeof(struct ctl_table)*2 + namelen + 1,
873 GFP_KERNEL);
874 if (!new)
875 return NULL;
876
877 node = (struct ctl_node *)(new + 1);
878 table = (struct ctl_table *)(node + 1);
879 new_name = (char *)(table + 2);
880 memcpy(new_name, name, namelen);
881 new_name[namelen] = '\0';
882 table[0].procname = new_name;
883 table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO;
884 init_header(&new->header, set->dir.header.root, set, node, table);
885
886 return new;
887}
888
889/**
890 * get_subdir - find or create a subdir with the specified name.
891 * @dir: Directory to create the subdirectory in
892 * @name: The name of the subdirectory to find or create
893 * @namelen: The length of name
894 *
895 * Takes a directory with an elevated reference count so we know that
896 * if we drop the lock the directory will not go away. Upon success
897 * the reference is moved from @dir to the returned subdirectory.
898 * Upon error an error code is returned and the reference on @dir is
899 * simply dropped.
900 */
901static struct ctl_dir *get_subdir(struct ctl_dir *dir,
902 const char *name, int namelen)
903{
904 struct ctl_table_set *set = dir->header.set;
905 struct ctl_dir *subdir, *new = NULL;
906 int err;
907
908 spin_lock(&sysctl_lock);
909 subdir = find_subdir(dir, name, namelen);
910 if (!IS_ERR(subdir))
911 goto found;
912 if (PTR_ERR(subdir) != -ENOENT)
913 goto failed;
914
915 spin_unlock(&sysctl_lock);
916 new = new_dir(set, name, namelen);
917 spin_lock(&sysctl_lock);
918 subdir = ERR_PTR(-ENOMEM);
919 if (!new)
920 goto failed;
921
922 /* Was the subdir added while we dropped the lock? */
923 subdir = find_subdir(dir, name, namelen);
924 if (!IS_ERR(subdir))
925 goto found;
926 if (PTR_ERR(subdir) != -ENOENT)
927 goto failed;
928
929 /* Nope. Use the our freshly made directory entry. */
930 err = insert_header(dir, &new->header);
931 subdir = ERR_PTR(err);
932 if (err)
933 goto failed;
934 subdir = new;
935found:
936 subdir->header.nreg++;
937failed:
938 if (unlikely(IS_ERR(subdir))) {
939 printk(KERN_ERR "sysctl could not get directory: ");
940 sysctl_print_dir(dir);
941 printk(KERN_CONT "/%*.*s %ld\n",
942 namelen, namelen, name, PTR_ERR(subdir));
943 }
944 drop_sysctl_table(&dir->header);
945 if (new)
946 drop_sysctl_table(&new->header);
947 spin_unlock(&sysctl_lock);
948 return subdir;
949}
950
951static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir)
952{
953 struct ctl_dir *parent;
954 const char *procname;
955 if (!dir->header.parent)
956 return &set->dir;
957 parent = xlate_dir(set, dir->header.parent);
958 if (IS_ERR(parent))
959 return parent;
960 procname = dir->header.ctl_table[0].procname;
961 return find_subdir(parent, procname, strlen(procname));
962}
963
964static int sysctl_follow_link(struct ctl_table_header **phead,
965 struct ctl_table **pentry, struct nsproxy *namespaces)
966{
967 struct ctl_table_header *head;
968 struct ctl_table_root *root;
969 struct ctl_table_set *set;
970 struct ctl_table *entry;
971 struct ctl_dir *dir;
972 int ret;
973
974 ret = 0;
975 spin_lock(&sysctl_lock);
976 root = (*pentry)->data;
977 set = lookup_header_set(root, namespaces);
978 dir = xlate_dir(set, (*phead)->parent);
979 if (IS_ERR(dir))
980 ret = PTR_ERR(dir);
981 else {
982 const char *procname = (*pentry)->procname;
983 head = NULL;
984 entry = find_entry(&head, dir, procname, strlen(procname));
985 ret = -ENOENT;
986 if (entry && use_table(head)) {
987 unuse_table(*phead);
988 *phead = head;
989 *pentry = entry;
990 ret = 0;
991 }
992 }
993
994 spin_unlock(&sysctl_lock);
995 return ret;
996}
997
998static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...)
999{
1000 struct va_format vaf;
1001 va_list args;
1002
1003 va_start(args, fmt);
1004 vaf.fmt = fmt;
1005 vaf.va = &args;
1006
1007 printk(KERN_ERR "sysctl table check failed: %s/%s %pV\n",
1008 path, table->procname, &vaf);
1009
1010 va_end(args);
1011 return -EINVAL;
1012}
1013
1014static int sysctl_check_table(const char *path, struct ctl_table *table)
1015{
1016 int err = 0;
1017 for (; table->procname; table++) {
1018 if (table->child)
1019 err = sysctl_err(path, table, "Not a file");
1020
1021 if ((table->proc_handler == proc_dostring) ||
1022 (table->proc_handler == proc_dointvec) ||
1023 (table->proc_handler == proc_dointvec_minmax) ||
1024 (table->proc_handler == proc_dointvec_jiffies) ||
1025 (table->proc_handler == proc_dointvec_userhz_jiffies) ||
1026 (table->proc_handler == proc_dointvec_ms_jiffies) ||
1027 (table->proc_handler == proc_doulongvec_minmax) ||
1028 (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
1029 if (!table->data)
1030 err = sysctl_err(path, table, "No data");
1031 if (!table->maxlen)
1032 err = sysctl_err(path, table, "No maxlen");
1033 }
1034 if (!table->proc_handler)
1035 err = sysctl_err(path, table, "No proc_handler");
1036
1037 if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode)
1038 err = sysctl_err(path, table, "bogus .mode 0%o",
1039 table->mode);
1040 }
1041 return err;
1042}
1043
1044static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table *table,
1045 struct ctl_table_root *link_root)
1046{
1047 struct ctl_table *link_table, *entry, *link;
1048 struct ctl_table_header *links;
1049 struct ctl_node *node;
1050 char *link_name;
1051 int nr_entries, name_bytes;
1052
1053 name_bytes = 0;
1054 nr_entries = 0;
1055 for (entry = table; entry->procname; entry++) {
1056 nr_entries++;
1057 name_bytes += strlen(entry->procname) + 1;
1058 }
1059
1060 links = kzalloc(sizeof(struct ctl_table_header) +
1061 sizeof(struct ctl_node)*nr_entries +
1062 sizeof(struct ctl_table)*(nr_entries + 1) +
1063 name_bytes,
1064 GFP_KERNEL);
1065
1066 if (!links)
1067 return NULL;
1068
1069 node = (struct ctl_node *)(links + 1);
1070 link_table = (struct ctl_table *)(node + nr_entries);
1071 link_name = (char *)&link_table[nr_entries + 1];
1072
1073 for (link = link_table, entry = table; entry->procname; link++, entry++) {
1074 int len = strlen(entry->procname) + 1;
1075 memcpy(link_name, entry->procname, len);
1076 link->procname = link_name;
1077 link->mode = S_IFLNK|S_IRWXUGO;
1078 link->data = link_root;
1079 link_name += len;
1080 }
1081 init_header(links, dir->header.root, dir->header.set, node, link_table);
1082 links->nreg = nr_entries;
1083
1084 return links;
1085}
1086
1087static bool get_links(struct ctl_dir *dir,
1088 struct ctl_table *table, struct ctl_table_root *link_root)
1089{
1090 struct ctl_table_header *head;
1091 struct ctl_table *entry, *link;
1092
1093 /* Are there links available for every entry in table? */
1094 for (entry = table; entry->procname; entry++) {
1095 const char *procname = entry->procname;
1096 link = find_entry(&head, dir, procname, strlen(procname));
1097 if (!link)
1098 return false;
1099 if (S_ISDIR(link->mode) && S_ISDIR(entry->mode))
1100 continue;
1101 if (S_ISLNK(link->mode) && (link->data == link_root))
1102 continue;
1103 return false;
1104 }
1105
1106 /* The checks passed. Increase the registration count on the links */
1107 for (entry = table; entry->procname; entry++) {
1108 const char *procname = entry->procname;
1109 link = find_entry(&head, dir, procname, strlen(procname));
1110 head->nreg++;
1111 }
1112 return true;
1113}
1114
1115static int insert_links(struct ctl_table_header *head)
1116{
1117 struct ctl_table_set *root_set = &sysctl_table_root.default_set;
1118 struct ctl_dir *core_parent = NULL;
1119 struct ctl_table_header *links;
1120 int err;
1121
1122 if (head->set == root_set)
1123 return 0;
1124
1125 core_parent = xlate_dir(root_set, head->parent);
1126 if (IS_ERR(core_parent))
1127 return 0;
1128
1129 if (get_links(core_parent, head->ctl_table, head->root))
1130 return 0;
1131
1132 core_parent->header.nreg++;
1133 spin_unlock(&sysctl_lock);
1134
1135 links = new_links(core_parent, head->ctl_table, head->root);
1136
1137 spin_lock(&sysctl_lock);
1138 err = -ENOMEM;
1139 if (!links)
1140 goto out;
1141
1142 err = 0;
1143 if (get_links(core_parent, head->ctl_table, head->root)) {
1144 kfree(links);
1145 goto out;
1146 }
1147
1148 err = insert_header(core_parent, links);
1149 if (err)
1150 kfree(links);
1151out:
1152 drop_sysctl_table(&core_parent->header);
1153 return err;
1154}
1155
1156/**
1157 * __register_sysctl_table - register a leaf sysctl table
1158 * @set: Sysctl tree to register on
1159 * @path: The path to the directory the sysctl table is in.
1160 * @table: the top-level table structure
1161 *
1162 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1163 * array. A completely 0 filled entry terminates the table.
1164 *
1165 * The members of the &struct ctl_table structure are used as follows:
1166 *
1167 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1168 * enter a sysctl file
1169 *
1170 * data - a pointer to data for use by proc_handler
1171 *
1172 * maxlen - the maximum size in bytes of the data
1173 *
1174 * mode - the file permissions for the /proc/sys file
1175 *
1176 * child - must be %NULL.
1177 *
1178 * proc_handler - the text handler routine (described below)
1179 *
1180 * extra1, extra2 - extra pointers usable by the proc handler routines
1181 *
1182 * Leaf nodes in the sysctl tree will be represented by a single file
1183 * under /proc; non-leaf nodes will be represented by directories.
1184 *
1185 * There must be a proc_handler routine for any terminal nodes.
1186 * Several default handlers are available to cover common cases -
1187 *
1188 * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1189 * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1190 * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1191 *
1192 * It is the handler's job to read the input buffer from user memory
1193 * and process it. The handler should return 0 on success.
1194 *
1195 * This routine returns %NULL on a failure to register, and a pointer
1196 * to the table header on success.
1197 */
1198struct ctl_table_header *__register_sysctl_table(
1199 struct ctl_table_set *set,
1200 const char *path, struct ctl_table *table)
1201{
1202 struct ctl_table_root *root = set->dir.header.root;
1203 struct ctl_table_header *header;
1204 const char *name, *nextname;
1205 struct ctl_dir *dir;
1206 struct ctl_table *entry;
1207 struct ctl_node *node;
1208 int nr_entries = 0;
1209
1210 for (entry = table; entry->procname; entry++)
1211 nr_entries++;
1212
1213 header = kzalloc(sizeof(struct ctl_table_header) +
1214 sizeof(struct ctl_node)*nr_entries, GFP_KERNEL);
1215 if (!header)
1216 return NULL;
1217
1218 node = (struct ctl_node *)(header + 1);
1219 init_header(header, root, set, node, table);
1220 if (sysctl_check_table(path, table))
1221 goto fail;
1222
1223 spin_lock(&sysctl_lock);
1224 dir = &set->dir;
1225 /* Reference moved down the diretory tree get_subdir */
1226 dir->header.nreg++;
1227 spin_unlock(&sysctl_lock);
1228
1229 /* Find the directory for the ctl_table */
1230 for (name = path; name; name = nextname) {
1231 int namelen;
1232 nextname = strchr(name, '/');
1233 if (nextname) {
1234 namelen = nextname - name;
1235 nextname++;
1236 } else {
1237 namelen = strlen(name);
1238 }
1239 if (namelen == 0)
1240 continue;
1241
1242 dir = get_subdir(dir, name, namelen);
1243 if (IS_ERR(dir))
1244 goto fail;
1245 }
1246
1247 spin_lock(&sysctl_lock);
1248 if (insert_header(dir, header))
1249 goto fail_put_dir_locked;
1250
1251 drop_sysctl_table(&dir->header);
1252 spin_unlock(&sysctl_lock);
1253
1254 return header;
1255
1256fail_put_dir_locked:
1257 drop_sysctl_table(&dir->header);
1258 spin_unlock(&sysctl_lock);
1259fail:
1260 kfree(header);
1261 dump_stack();
1262 return NULL;
1263}
1264
1265/**
1266 * register_sysctl - register a sysctl table
1267 * @path: The path to the directory the sysctl table is in.
1268 * @table: the table structure
1269 *
1270 * Register a sysctl table. @table should be a filled in ctl_table
1271 * array. A completely 0 filled entry terminates the table.
1272 *
1273 * See __register_sysctl_table for more details.
1274 */
1275struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table)
1276{
1277 return __register_sysctl_table(&sysctl_table_root.default_set,
1278 path, table);
1279}
1280EXPORT_SYMBOL(register_sysctl);
1281
1282static char *append_path(const char *path, char *pos, const char *name)
1283{
1284 int namelen;
1285 namelen = strlen(name);
1286 if (((pos - path) + namelen + 2) >= PATH_MAX)
1287 return NULL;
1288 memcpy(pos, name, namelen);
1289 pos[namelen] = '/';
1290 pos[namelen + 1] = '\0';
1291 pos += namelen + 1;
1292 return pos;
1293}
1294
1295static int count_subheaders(struct ctl_table *table)
1296{
1297 int has_files = 0;
1298 int nr_subheaders = 0;
1299 struct ctl_table *entry;
1300
1301 /* special case: no directory and empty directory */
1302 if (!table || !table->procname)
1303 return 1;
1304
1305 for (entry = table; entry->procname; entry++) {
1306 if (entry->child)
1307 nr_subheaders += count_subheaders(entry->child);
1308 else
1309 has_files = 1;
1310 }
1311 return nr_subheaders + has_files;
1312}
1313
1314static int register_leaf_sysctl_tables(const char *path, char *pos,
1315 struct ctl_table_header ***subheader, struct ctl_table_set *set,
1316 struct ctl_table *table)
1317{
1318 struct ctl_table *ctl_table_arg = NULL;
1319 struct ctl_table *entry, *files;
1320 int nr_files = 0;
1321 int nr_dirs = 0;
1322 int err = -ENOMEM;
1323
1324 for (entry = table; entry->procname; entry++) {
1325 if (entry->child)
1326 nr_dirs++;
1327 else
1328 nr_files++;
1329 }
1330
1331 files = table;
1332 /* If there are mixed files and directories we need a new table */
1333 if (nr_dirs && nr_files) {
1334 struct ctl_table *new;
1335 files = kzalloc(sizeof(struct ctl_table) * (nr_files + 1),
1336 GFP_KERNEL);
1337 if (!files)
1338 goto out;
1339
1340 ctl_table_arg = files;
1341 for (new = files, entry = table; entry->procname; entry++) {
1342 if (entry->child)
1343 continue;
1344 *new = *entry;
1345 new++;
1346 }
1347 }
1348
1349 /* Register everything except a directory full of subdirectories */
1350 if (nr_files || !nr_dirs) {
1351 struct ctl_table_header *header;
1352 header = __register_sysctl_table(set, path, files);
1353 if (!header) {
1354 kfree(ctl_table_arg);
1355 goto out;
1356 }
1357
1358 /* Remember if we need to free the file table */
1359 header->ctl_table_arg = ctl_table_arg;
1360 **subheader = header;
1361 (*subheader)++;
1362 }
1363
1364 /* Recurse into the subdirectories. */
1365 for (entry = table; entry->procname; entry++) {
1366 char *child_pos;
1367
1368 if (!entry->child)
1369 continue;
1370
1371 err = -ENAMETOOLONG;
1372 child_pos = append_path(path, pos, entry->procname);
1373 if (!child_pos)
1374 goto out;
1375
1376 err = register_leaf_sysctl_tables(path, child_pos, subheader,
1377 set, entry->child);
1378 pos[0] = '\0';
1379 if (err)
1380 goto out;
1381 }
1382 err = 0;
1383out:
1384 /* On failure our caller will unregister all registered subheaders */
1385 return err;
1386}
1387
1388/**
1389 * __register_sysctl_paths - register a sysctl table hierarchy
1390 * @set: Sysctl tree to register on
1391 * @path: The path to the directory the sysctl table is in.
1392 * @table: the top-level table structure
1393 *
1394 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1395 * array. A completely 0 filled entry terminates the table.
1396 *
1397 * See __register_sysctl_table for more details.
1398 */
1399struct ctl_table_header *__register_sysctl_paths(
1400 struct ctl_table_set *set,
1401 const struct ctl_path *path, struct ctl_table *table)
1402{
1403 struct ctl_table *ctl_table_arg = table;
1404 int nr_subheaders = count_subheaders(table);
1405 struct ctl_table_header *header = NULL, **subheaders, **subheader;
1406 const struct ctl_path *component;
1407 char *new_path, *pos;
1408
1409 pos = new_path = kmalloc(PATH_MAX, GFP_KERNEL);
1410 if (!new_path)
1411 return NULL;
1412
1413 pos[0] = '\0';
1414 for (component = path; component->procname; component++) {
1415 pos = append_path(new_path, pos, component->procname);
1416 if (!pos)
1417 goto out;
1418 }
1419 while (table->procname && table->child && !table[1].procname) {
1420 pos = append_path(new_path, pos, table->procname);
1421 if (!pos)
1422 goto out;
1423 table = table->child;
1424 }
1425 if (nr_subheaders == 1) {
1426 header = __register_sysctl_table(set, new_path, table);
1427 if (header)
1428 header->ctl_table_arg = ctl_table_arg;
1429 } else {
1430 header = kzalloc(sizeof(*header) +
1431 sizeof(*subheaders)*nr_subheaders, GFP_KERNEL);
1432 if (!header)
1433 goto out;
1434
1435 subheaders = (struct ctl_table_header **) (header + 1);
1436 subheader = subheaders;
1437 header->ctl_table_arg = ctl_table_arg;
1438
1439 if (register_leaf_sysctl_tables(new_path, pos, &subheader,
1440 set, table))
1441 goto err_register_leaves;
1442 }
1443
1444out:
1445 kfree(new_path);
1446 return header;
1447
1448err_register_leaves:
1449 while (subheader > subheaders) {
1450 struct ctl_table_header *subh = *(--subheader);
1451 struct ctl_table *table = subh->ctl_table_arg;
1452 unregister_sysctl_table(subh);
1453 kfree(table);
1454 }
1455 kfree(header);
1456 header = NULL;
1457 goto out;
1458}
1459
1460/**
1461 * register_sysctl_table_path - register a sysctl table hierarchy
1462 * @path: The path to the directory the sysctl table is in.
1463 * @table: the top-level table structure
1464 *
1465 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1466 * array. A completely 0 filled entry terminates the table.
1467 *
1468 * See __register_sysctl_paths for more details.
1469 */
1470struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1471 struct ctl_table *table)
1472{
1473 return __register_sysctl_paths(&sysctl_table_root.default_set,
1474 path, table);
1475}
1476EXPORT_SYMBOL(register_sysctl_paths);
1477
1478/**
1479 * register_sysctl_table - register a sysctl table hierarchy
1480 * @table: the top-level table structure
1481 *
1482 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1483 * array. A completely 0 filled entry terminates the table.
1484 *
1485 * See register_sysctl_paths for more details.
1486 */
1487struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
1488{
1489 static const struct ctl_path null_path[] = { {} };
1490
1491 return register_sysctl_paths(null_path, table);
1492}
1493EXPORT_SYMBOL(register_sysctl_table);
1494
1495static void put_links(struct ctl_table_header *header)
1496{
1497 struct ctl_table_set *root_set = &sysctl_table_root.default_set;
1498 struct ctl_table_root *root = header->root;
1499 struct ctl_dir *parent = header->parent;
1500 struct ctl_dir *core_parent;
1501 struct ctl_table *entry;
1502
1503 if (header->set == root_set)
1504 return;
1505
1506 core_parent = xlate_dir(root_set, parent);
1507 if (IS_ERR(core_parent))
1508 return;
1509
1510 for (entry = header->ctl_table; entry->procname; entry++) {
1511 struct ctl_table_header *link_head;
1512 struct ctl_table *link;
1513 const char *name = entry->procname;
1514
1515 link = find_entry(&link_head, core_parent, name, strlen(name));
1516 if (link &&
1517 ((S_ISDIR(link->mode) && S_ISDIR(entry->mode)) ||
1518 (S_ISLNK(link->mode) && (link->data == root)))) {
1519 drop_sysctl_table(link_head);
1520 }
1521 else {
1522 printk(KERN_ERR "sysctl link missing during unregister: ");
1523 sysctl_print_dir(parent);
1524 printk(KERN_CONT "/%s\n", name);
1525 }
1526 }
1527}
1528
1529static void drop_sysctl_table(struct ctl_table_header *header)
1530{
1531 struct ctl_dir *parent = header->parent;
1532
1533 if (--header->nreg)
1534 return;
1535
1536 put_links(header);
1537 start_unregistering(header);
1538 if (!--header->count)
1539 kfree_rcu(header, rcu);
1540
1541 if (parent)
1542 drop_sysctl_table(&parent->header);
1543}
1544
1545/**
1546 * unregister_sysctl_table - unregister a sysctl table hierarchy
1547 * @header: the header returned from register_sysctl_table
1548 *
1549 * Unregisters the sysctl table and all children. proc entries may not
1550 * actually be removed until they are no longer used by anyone.
1551 */
1552void unregister_sysctl_table(struct ctl_table_header * header)
1553{
1554 int nr_subheaders;
1555 might_sleep();
1556
1557 if (header == NULL)
1558 return;
1559
1560 nr_subheaders = count_subheaders(header->ctl_table_arg);
1561 if (unlikely(nr_subheaders > 1)) {
1562 struct ctl_table_header **subheaders;
1563 int i;
1564
1565 subheaders = (struct ctl_table_header **)(header + 1);
1566 for (i = nr_subheaders -1; i >= 0; i--) {
1567 struct ctl_table_header *subh = subheaders[i];
1568 struct ctl_table *table = subh->ctl_table_arg;
1569 unregister_sysctl_table(subh);
1570 kfree(table);
1571 }
1572 kfree(header);
1573 return;
1574 }
1575
1576 spin_lock(&sysctl_lock);
1577 drop_sysctl_table(header);
1578 spin_unlock(&sysctl_lock);
1579}
1580EXPORT_SYMBOL(unregister_sysctl_table);
1581
1582void setup_sysctl_set(struct ctl_table_set *set,
1583 struct ctl_table_root *root,
1584 int (*is_seen)(struct ctl_table_set *))
1585{
1586 memset(set, 0, sizeof(*set));
1587 set->is_seen = is_seen;
1588 init_header(&set->dir.header, root, set, NULL, root_table);
1589}
1590
1591void retire_sysctl_set(struct ctl_table_set *set)
1592{
1593 WARN_ON(!RB_EMPTY_ROOT(&set->dir.root));
1594}
1595
475int __init proc_sys_init(void) 1596int __init proc_sys_init(void)
476{ 1597{
477 struct proc_dir_entry *proc_sys_root; 1598 struct proc_dir_entry *proc_sys_root;
@@ -480,5 +1601,6 @@ int __init proc_sys_init(void)
480 proc_sys_root->proc_iops = &proc_sys_dir_operations; 1601 proc_sys_root->proc_iops = &proc_sys_dir_operations;
481 proc_sys_root->proc_fops = &proc_sys_dir_file_operations; 1602 proc_sys_root->proc_fops = &proc_sys_dir_file_operations;
482 proc_sys_root->nlink = 0; 1603 proc_sys_root->nlink = 0;
483 return 0; 1604
1605 return sysctl_init();
484} 1606}
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index bb9127dd814b..c34b4c82b0dc 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -932,34 +932,14 @@ enum
932#include <linux/list.h> 932#include <linux/list.h>
933#include <linux/rcupdate.h> 933#include <linux/rcupdate.h>
934#include <linux/wait.h> 934#include <linux/wait.h>
935#include <linux/rbtree.h>
935 936
936/* For the /proc/sys support */ 937/* For the /proc/sys support */
937struct ctl_table; 938struct ctl_table;
938struct nsproxy; 939struct nsproxy;
939struct ctl_table_root; 940struct ctl_table_root;
940
941struct ctl_table_set {
942 struct list_head list;
943 struct ctl_table_set *parent;
944 int (*is_seen)(struct ctl_table_set *);
945};
946
947extern void setup_sysctl_set(struct ctl_table_set *p,
948 struct ctl_table_set *parent,
949 int (*is_seen)(struct ctl_table_set *));
950
951struct ctl_table_header; 941struct ctl_table_header;
952 942struct ctl_dir;
953extern void sysctl_head_get(struct ctl_table_header *);
954extern void sysctl_head_put(struct ctl_table_header *);
955extern int sysctl_is_seen(struct ctl_table_header *);
956extern struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *);
957extern struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev);
958extern struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
959 struct ctl_table_header *prev);
960extern void sysctl_head_finish(struct ctl_table_header *prev);
961extern int sysctl_perm(struct ctl_table_root *root,
962 struct ctl_table *table, int op);
963 943
964typedef struct ctl_table ctl_table; 944typedef struct ctl_table ctl_table;
965 945
@@ -1023,8 +1003,6 @@ static inline void *proc_sys_poll_event(struct ctl_table_poll *poll)
1023 return (void *)(unsigned long)atomic_read(&poll->event); 1003 return (void *)(unsigned long)atomic_read(&poll->event);
1024} 1004}
1025 1005
1026void proc_sys_poll_notify(struct ctl_table_poll *poll);
1027
1028#define __CTL_TABLE_POLL_INITIALIZER(name) { \ 1006#define __CTL_TABLE_POLL_INITIALIZER(name) { \
1029 .event = ATOMIC_INIT(0), \ 1007 .event = ATOMIC_INIT(0), \
1030 .wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait) } 1008 .wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait) }
@@ -1039,21 +1017,16 @@ struct ctl_table
1039 void *data; 1017 void *data;
1040 int maxlen; 1018 int maxlen;
1041 umode_t mode; 1019 umode_t mode;
1042 struct ctl_table *child; 1020 struct ctl_table *child; /* Deprecated */
1043 struct ctl_table *parent; /* Automatically set */
1044 proc_handler *proc_handler; /* Callback for text formatting */ 1021 proc_handler *proc_handler; /* Callback for text formatting */
1045 struct ctl_table_poll *poll; 1022 struct ctl_table_poll *poll;
1046 void *extra1; 1023 void *extra1;
1047 void *extra2; 1024 void *extra2;
1048}; 1025};
1049 1026
1050struct ctl_table_root { 1027struct ctl_node {
1051 struct list_head root_list; 1028 struct rb_node node;
1052 struct ctl_table_set default_set; 1029 struct ctl_table_header *header;
1053 struct ctl_table_set *(*lookup)(struct ctl_table_root *root,
1054 struct nsproxy *namespaces);
1055 int (*permissions)(struct ctl_table_root *root,
1056 struct nsproxy *namespaces, struct ctl_table *table);
1057}; 1030};
1058 1031
1059/* struct ctl_table_header is used to maintain dynamic lists of 1032/* struct ctl_table_header is used to maintain dynamic lists of
@@ -1063,9 +1036,9 @@ struct ctl_table_header
1063 union { 1036 union {
1064 struct { 1037 struct {
1065 struct ctl_table *ctl_table; 1038 struct ctl_table *ctl_table;
1066 struct list_head ctl_entry;
1067 int used; 1039 int used;
1068 int count; 1040 int count;
1041 int nreg;
1069 }; 1042 };
1070 struct rcu_head rcu; 1043 struct rcu_head rcu;
1071 }; 1044 };
@@ -1073,9 +1046,27 @@ struct ctl_table_header
1073 struct ctl_table *ctl_table_arg; 1046 struct ctl_table *ctl_table_arg;
1074 struct ctl_table_root *root; 1047 struct ctl_table_root *root;
1075 struct ctl_table_set *set; 1048 struct ctl_table_set *set;
1076 struct ctl_table *attached_by; 1049 struct ctl_dir *parent;
1077 struct ctl_table *attached_to; 1050 struct ctl_node *node;
1078 struct ctl_table_header *parent; 1051};
1052
1053struct ctl_dir {
1054 /* Header must be at the start of ctl_dir */
1055 struct ctl_table_header header;
1056 struct rb_root root;
1057};
1058
1059struct ctl_table_set {
1060 int (*is_seen)(struct ctl_table_set *);
1061 struct ctl_dir dir;
1062};
1063
1064struct ctl_table_root {
1065 struct ctl_table_set default_set;
1066 struct ctl_table_set *(*lookup)(struct ctl_table_root *root,
1067 struct nsproxy *namespaces);
1068 int (*permissions)(struct ctl_table_root *root,
1069 struct nsproxy *namespaces, struct ctl_table *table);
1079}; 1070};
1080 1071
1081/* struct ctl_path describes where in the hierarchy a table is added */ 1072/* struct ctl_path describes where in the hierarchy a table is added */
@@ -1083,16 +1074,53 @@ struct ctl_path {
1083 const char *procname; 1074 const char *procname;
1084}; 1075};
1085 1076
1077#ifdef CONFIG_SYSCTL
1078
1079void proc_sys_poll_notify(struct ctl_table_poll *poll);
1080
1081extern void setup_sysctl_set(struct ctl_table_set *p,
1082 struct ctl_table_root *root,
1083 int (*is_seen)(struct ctl_table_set *));
1084extern void retire_sysctl_set(struct ctl_table_set *set);
1085
1086void register_sysctl_root(struct ctl_table_root *root); 1086void register_sysctl_root(struct ctl_table_root *root);
1087struct ctl_table_header *__register_sysctl_table(
1088 struct ctl_table_set *set,
1089 const char *path, struct ctl_table *table);
1087struct ctl_table_header *__register_sysctl_paths( 1090struct ctl_table_header *__register_sysctl_paths(
1088 struct ctl_table_root *root, struct nsproxy *namespaces, 1091 struct ctl_table_set *set,
1089 const struct ctl_path *path, struct ctl_table *table); 1092 const struct ctl_path *path, struct ctl_table *table);
1093struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table);
1090struct ctl_table_header *register_sysctl_table(struct ctl_table * table); 1094struct ctl_table_header *register_sysctl_table(struct ctl_table * table);
1091struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, 1095struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1092 struct ctl_table *table); 1096 struct ctl_table *table);
1093 1097
1094void unregister_sysctl_table(struct ctl_table_header * table); 1098void unregister_sysctl_table(struct ctl_table_header * table);
1095int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table); 1099
1100extern int sysctl_init(void);
1101#else /* CONFIG_SYSCTL */
1102static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
1103{
1104 return NULL;
1105}
1106
1107static inline struct ctl_table_header *register_sysctl_paths(
1108 const struct ctl_path *path, struct ctl_table *table)
1109{
1110 return NULL;
1111}
1112
1113static inline void unregister_sysctl_table(struct ctl_table_header * table)
1114{
1115}
1116
1117static inline void setup_sysctl_set(struct ctl_table_set *p,
1118 struct ctl_table_root *root,
1119 int (*is_seen)(struct ctl_table_set *))
1120{
1121}
1122
1123#endif /* CONFIG_SYSCTL */
1096 1124
1097#endif /* __KERNEL__ */ 1125#endif /* __KERNEL__ */
1098 1126
diff --git a/kernel/Makefile b/kernel/Makefile
index 2d9de86b7e76..cb41b9547c9f 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -27,7 +27,6 @@ obj-y += power/
27 27
28obj-$(CONFIG_FREEZER) += freezer.o 28obj-$(CONFIG_FREEZER) += freezer.o
29obj-$(CONFIG_PROFILING) += profile.o 29obj-$(CONFIG_PROFILING) += profile.o
30obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
31obj-$(CONFIG_STACKTRACE) += stacktrace.o 30obj-$(CONFIG_STACKTRACE) += stacktrace.o
32obj-y += time/ 31obj-y += time/
33obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o 32obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 11d53046b905..d48ff4fd44c3 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -193,20 +193,6 @@ static int sysrq_sysctl_handler(ctl_table *table, int write,
193 193
194#endif 194#endif
195 195
196static struct ctl_table root_table[];
197static struct ctl_table_root sysctl_table_root;
198static struct ctl_table_header root_table_header = {
199 {{.count = 1,
200 .ctl_table = root_table,
201 .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}},
202 .root = &sysctl_table_root,
203 .set = &sysctl_table_root.default_set,
204};
205static struct ctl_table_root sysctl_table_root = {
206 .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list),
207 .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry),
208};
209
210static struct ctl_table kern_table[]; 196static struct ctl_table kern_table[];
211static struct ctl_table vm_table[]; 197static struct ctl_table vm_table[];
212static struct ctl_table fs_table[]; 198static struct ctl_table fs_table[];
@@ -223,7 +209,7 @@ int sysctl_legacy_va_layout;
223 209
224/* The default sysctl tables: */ 210/* The default sysctl tables: */
225 211
226static struct ctl_table root_table[] = { 212static struct ctl_table sysctl_base_table[] = {
227 { 213 {
228 .procname = "kernel", 214 .procname = "kernel",
229 .mode = 0555, 215 .mode = 0555,
@@ -1560,490 +1546,12 @@ static struct ctl_table dev_table[] = {
1560 { } 1546 { }
1561}; 1547};
1562 1548
1563static DEFINE_SPINLOCK(sysctl_lock); 1549int __init sysctl_init(void)
1564
1565/* called under sysctl_lock */
1566static int use_table(struct ctl_table_header *p)
1567{
1568 if (unlikely(p->unregistering))
1569 return 0;
1570 p->used++;
1571 return 1;
1572}
1573
1574/* called under sysctl_lock */
1575static void unuse_table(struct ctl_table_header *p)
1576{
1577 if (!--p->used)
1578 if (unlikely(p->unregistering))
1579 complete(p->unregistering);
1580}
1581
1582/* called under sysctl_lock, will reacquire if has to wait */
1583static void start_unregistering(struct ctl_table_header *p)
1584{
1585 /*
1586 * if p->used is 0, nobody will ever touch that entry again;
1587 * we'll eliminate all paths to it before dropping sysctl_lock
1588 */
1589 if (unlikely(p->used)) {
1590 struct completion wait;
1591 init_completion(&wait);
1592 p->unregistering = &wait;
1593 spin_unlock(&sysctl_lock);
1594 wait_for_completion(&wait);
1595 spin_lock(&sysctl_lock);
1596 } else {
1597 /* anything non-NULL; we'll never dereference it */
1598 p->unregistering = ERR_PTR(-EINVAL);
1599 }
1600 /*
1601 * do not remove from the list until nobody holds it; walking the
1602 * list in do_sysctl() relies on that.
1603 */
1604 list_del_init(&p->ctl_entry);
1605}
1606
1607void sysctl_head_get(struct ctl_table_header *head)
1608{
1609 spin_lock(&sysctl_lock);
1610 head->count++;
1611 spin_unlock(&sysctl_lock);
1612}
1613
1614void sysctl_head_put(struct ctl_table_header *head)
1615{
1616 spin_lock(&sysctl_lock);
1617 if (!--head->count)
1618 kfree_rcu(head, rcu);
1619 spin_unlock(&sysctl_lock);
1620}
1621
1622struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
1623{
1624 if (!head)
1625 BUG();
1626 spin_lock(&sysctl_lock);
1627 if (!use_table(head))
1628 head = ERR_PTR(-ENOENT);
1629 spin_unlock(&sysctl_lock);
1630 return head;
1631}
1632
1633void sysctl_head_finish(struct ctl_table_header *head)
1634{
1635 if (!head)
1636 return;
1637 spin_lock(&sysctl_lock);
1638 unuse_table(head);
1639 spin_unlock(&sysctl_lock);
1640}
1641
1642static struct ctl_table_set *
1643lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
1644{
1645 struct ctl_table_set *set = &root->default_set;
1646 if (root->lookup)
1647 set = root->lookup(root, namespaces);
1648 return set;
1649}
1650
1651static struct list_head *
1652lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces)
1653{
1654 struct ctl_table_set *set = lookup_header_set(root, namespaces);
1655 return &set->list;
1656}
1657
1658struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
1659 struct ctl_table_header *prev)
1660{
1661 struct ctl_table_root *root;
1662 struct list_head *header_list;
1663 struct ctl_table_header *head;
1664 struct list_head *tmp;
1665
1666 spin_lock(&sysctl_lock);
1667 if (prev) {
1668 head = prev;
1669 tmp = &prev->ctl_entry;
1670 unuse_table(prev);
1671 goto next;
1672 }
1673 tmp = &root_table_header.ctl_entry;
1674 for (;;) {
1675 head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1676
1677 if (!use_table(head))
1678 goto next;
1679 spin_unlock(&sysctl_lock);
1680 return head;
1681 next:
1682 root = head->root;
1683 tmp = tmp->next;
1684 header_list = lookup_header_list(root, namespaces);
1685 if (tmp != header_list)
1686 continue;
1687
1688 do {
1689 root = list_entry(root->root_list.next,
1690 struct ctl_table_root, root_list);
1691 if (root == &sysctl_table_root)
1692 goto out;
1693 header_list = lookup_header_list(root, namespaces);
1694 } while (list_empty(header_list));
1695 tmp = header_list->next;
1696 }
1697out:
1698 spin_unlock(&sysctl_lock);
1699 return NULL;
1700}
1701
1702struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1703{
1704 return __sysctl_head_next(current->nsproxy, prev);
1705}
1706
1707void register_sysctl_root(struct ctl_table_root *root)
1708{
1709 spin_lock(&sysctl_lock);
1710 list_add_tail(&root->root_list, &sysctl_table_root.root_list);
1711 spin_unlock(&sysctl_lock);
1712}
1713
1714/*
1715 * sysctl_perm does NOT grant the superuser all rights automatically, because
1716 * some sysctl variables are readonly even to root.
1717 */
1718
1719static int test_perm(int mode, int op)
1720{
1721 if (!current_euid())
1722 mode >>= 6;
1723 else if (in_egroup_p(0))
1724 mode >>= 3;
1725 if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
1726 return 0;
1727 return -EACCES;
1728}
1729
1730int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
1731{
1732 int mode;
1733
1734 if (root->permissions)
1735 mode = root->permissions(root, current->nsproxy, table);
1736 else
1737 mode = table->mode;
1738
1739 return test_perm(mode, op);
1740}
1741
1742static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1743{
1744 for (; table->procname; table++) {
1745 table->parent = parent;
1746 if (table->child)
1747 sysctl_set_parent(table, table->child);
1748 }
1749}
1750
1751static __init int sysctl_init(void)
1752{ 1550{
1753 sysctl_set_parent(NULL, root_table); 1551 register_sysctl_table(sysctl_base_table);
1754#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1755 sysctl_check_table(current->nsproxy, root_table);
1756#endif
1757 return 0; 1552 return 0;
1758} 1553}
1759 1554
1760core_initcall(sysctl_init);
1761
1762static struct ctl_table *is_branch_in(struct ctl_table *branch,
1763 struct ctl_table *table)
1764{
1765 struct ctl_table *p;
1766 const char *s = branch->procname;
1767
1768 /* branch should have named subdirectory as its first element */
1769 if (!s || !branch->child)
1770 return NULL;
1771
1772 /* ... and nothing else */
1773 if (branch[1].procname)
1774 return NULL;
1775
1776 /* table should contain subdirectory with the same name */
1777 for (p = table; p->procname; p++) {
1778 if (!p->child)
1779 continue;
1780 if (p->procname && strcmp(p->procname, s) == 0)
1781 return p;
1782 }
1783 return NULL;
1784}
1785
1786/* see if attaching q to p would be an improvement */
1787static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
1788{
1789 struct ctl_table *to = p->ctl_table, *by = q->ctl_table;
1790 struct ctl_table *next;
1791 int is_better = 0;
1792 int not_in_parent = !p->attached_by;
1793
1794 while ((next = is_branch_in(by, to)) != NULL) {
1795 if (by == q->attached_by)
1796 is_better = 1;
1797 if (to == p->attached_by)
1798 not_in_parent = 1;
1799 by = by->child;
1800 to = next->child;
1801 }
1802
1803 if (is_better && not_in_parent) {
1804 q->attached_by = by;
1805 q->attached_to = to;
1806 q->parent = p;
1807 }
1808}
1809
1810/**
1811 * __register_sysctl_paths - register a sysctl hierarchy
1812 * @root: List of sysctl headers to register on
1813 * @namespaces: Data to compute which lists of sysctl entries are visible
1814 * @path: The path to the directory the sysctl table is in.
1815 * @table: the top-level table structure
1816 *
1817 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1818 * array. A completely 0 filled entry terminates the table.
1819 *
1820 * The members of the &struct ctl_table structure are used as follows:
1821 *
1822 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1823 * enter a sysctl file
1824 *
1825 * data - a pointer to data for use by proc_handler
1826 *
1827 * maxlen - the maximum size in bytes of the data
1828 *
1829 * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1830 *
1831 * child - a pointer to the child sysctl table if this entry is a directory, or
1832 * %NULL.
1833 *
1834 * proc_handler - the text handler routine (described below)
1835 *
1836 * de - for internal use by the sysctl routines
1837 *
1838 * extra1, extra2 - extra pointers usable by the proc handler routines
1839 *
1840 * Leaf nodes in the sysctl tree will be represented by a single file
1841 * under /proc; non-leaf nodes will be represented by directories.
1842 *
1843 * sysctl(2) can automatically manage read and write requests through
1844 * the sysctl table. The data and maxlen fields of the ctl_table
1845 * struct enable minimal validation of the values being written to be
1846 * performed, and the mode field allows minimal authentication.
1847 *
1848 * There must be a proc_handler routine for any terminal nodes
1849 * mirrored under /proc/sys (non-terminals are handled by a built-in
1850 * directory handler). Several default handlers are available to
1851 * cover common cases -
1852 *
1853 * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1854 * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1855 * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1856 *
1857 * It is the handler's job to read the input buffer from user memory
1858 * and process it. The handler should return 0 on success.
1859 *
1860 * This routine returns %NULL on a failure to register, and a pointer
1861 * to the table header on success.
1862 */
1863struct ctl_table_header *__register_sysctl_paths(
1864 struct ctl_table_root *root,
1865 struct nsproxy *namespaces,
1866 const struct ctl_path *path, struct ctl_table *table)
1867{
1868 struct ctl_table_header *header;
1869 struct ctl_table *new, **prevp;
1870 unsigned int n, npath;
1871 struct ctl_table_set *set;
1872
1873 /* Count the path components */
1874 for (npath = 0; path[npath].procname; ++npath)
1875 ;
1876
1877 /*
1878 * For each path component, allocate a 2-element ctl_table array.
1879 * The first array element will be filled with the sysctl entry
1880 * for this, the second will be the sentinel (procname == 0).
1881 *
1882 * We allocate everything in one go so that we don't have to
1883 * worry about freeing additional memory in unregister_sysctl_table.
1884 */
1885 header = kzalloc(sizeof(struct ctl_table_header) +
1886 (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL);
1887 if (!header)
1888 return NULL;
1889
1890 new = (struct ctl_table *) (header + 1);
1891
1892 /* Now connect the dots */
1893 prevp = &header->ctl_table;
1894 for (n = 0; n < npath; ++n, ++path) {
1895 /* Copy the procname */
1896 new->procname = path->procname;
1897 new->mode = 0555;
1898
1899 *prevp = new;
1900 prevp = &new->child;
1901
1902 new += 2;
1903 }
1904 *prevp = table;
1905 header->ctl_table_arg = table;
1906
1907 INIT_LIST_HEAD(&header->ctl_entry);
1908 header->used = 0;
1909 header->unregistering = NULL;
1910 header->root = root;
1911 sysctl_set_parent(NULL, header->ctl_table);
1912 header->count = 1;
1913#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1914 if (sysctl_check_table(namespaces, header->ctl_table)) {
1915 kfree(header);
1916 return NULL;
1917 }
1918#endif
1919 spin_lock(&sysctl_lock);
1920 header->set = lookup_header_set(root, namespaces);
1921 header->attached_by = header->ctl_table;
1922 header->attached_to = root_table;
1923 header->parent = &root_table_header;
1924 for (set = header->set; set; set = set->parent) {
1925 struct ctl_table_header *p;
1926 list_for_each_entry(p, &set->list, ctl_entry) {
1927 if (p->unregistering)
1928 continue;
1929 try_attach(p, header);
1930 }
1931 }
1932 header->parent->count++;
1933 list_add_tail(&header->ctl_entry, &header->set->list);
1934 spin_unlock(&sysctl_lock);
1935
1936 return header;
1937}
1938
1939/**
1940 * register_sysctl_table_path - register a sysctl table hierarchy
1941 * @path: The path to the directory the sysctl table is in.
1942 * @table: the top-level table structure
1943 *
1944 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1945 * array. A completely 0 filled entry terminates the table.
1946 *
1947 * See __register_sysctl_paths for more details.
1948 */
1949struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1950 struct ctl_table *table)
1951{
1952 return __register_sysctl_paths(&sysctl_table_root, current->nsproxy,
1953 path, table);
1954}
1955
1956/**
1957 * register_sysctl_table - register a sysctl table hierarchy
1958 * @table: the top-level table structure
1959 *
1960 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1961 * array. A completely 0 filled entry terminates the table.
1962 *
1963 * See register_sysctl_paths for more details.
1964 */
1965struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
1966{
1967 static const struct ctl_path null_path[] = { {} };
1968
1969 return register_sysctl_paths(null_path, table);
1970}
1971
1972/**
1973 * unregister_sysctl_table - unregister a sysctl table hierarchy
1974 * @header: the header returned from register_sysctl_table
1975 *
1976 * Unregisters the sysctl table and all children. proc entries may not
1977 * actually be removed until they are no longer used by anyone.
1978 */
1979void unregister_sysctl_table(struct ctl_table_header * header)
1980{
1981 might_sleep();
1982
1983 if (header == NULL)
1984 return;
1985
1986 spin_lock(&sysctl_lock);
1987 start_unregistering(header);
1988 if (!--header->parent->count) {
1989 WARN_ON(1);
1990 kfree_rcu(header->parent, rcu);
1991 }
1992 if (!--header->count)
1993 kfree_rcu(header, rcu);
1994 spin_unlock(&sysctl_lock);
1995}
1996
1997int sysctl_is_seen(struct ctl_table_header *p)
1998{
1999 struct ctl_table_set *set = p->set;
2000 int res;
2001 spin_lock(&sysctl_lock);
2002 if (p->unregistering)
2003 res = 0;
2004 else if (!set->is_seen)
2005 res = 1;
2006 else
2007 res = set->is_seen(set);
2008 spin_unlock(&sysctl_lock);
2009 return res;
2010}
2011
2012void setup_sysctl_set(struct ctl_table_set *p,
2013 struct ctl_table_set *parent,
2014 int (*is_seen)(struct ctl_table_set *))
2015{
2016 INIT_LIST_HEAD(&p->list);
2017 p->parent = parent ? parent : &sysctl_table_root.default_set;
2018 p->is_seen = is_seen;
2019}
2020
2021#else /* !CONFIG_SYSCTL */
2022struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
2023{
2024 return NULL;
2025}
2026
2027struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
2028 struct ctl_table *table)
2029{
2030 return NULL;
2031}
2032
2033void unregister_sysctl_table(struct ctl_table_header * table)
2034{
2035}
2036
2037void setup_sysctl_set(struct ctl_table_set *p,
2038 struct ctl_table_set *parent,
2039 int (*is_seen)(struct ctl_table_set *))
2040{
2041}
2042
2043void sysctl_head_put(struct ctl_table_header *head)
2044{
2045}
2046
2047#endif /* CONFIG_SYSCTL */ 1555#endif /* CONFIG_SYSCTL */
2048 1556
2049/* 1557/*
@@ -3009,6 +2517,3 @@ EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3009EXPORT_SYMBOL(proc_dostring); 2517EXPORT_SYMBOL(proc_dostring);
3010EXPORT_SYMBOL(proc_doulongvec_minmax); 2518EXPORT_SYMBOL(proc_doulongvec_minmax);
3011EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax); 2519EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
3012EXPORT_SYMBOL(register_sysctl_table);
3013EXPORT_SYMBOL(register_sysctl_paths);
3014EXPORT_SYMBOL(unregister_sysctl_table);
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
deleted file mode 100644
index 362da653813d..000000000000
--- a/kernel/sysctl_check.c
+++ /dev/null
@@ -1,160 +0,0 @@
1#include <linux/stat.h>
2#include <linux/sysctl.h>
3#include "../fs/xfs/xfs_sysctl.h"
4#include <linux/sunrpc/debug.h>
5#include <linux/string.h>
6#include <net/ip_vs.h>
7
8
9static int sysctl_depth(struct ctl_table *table)
10{
11 struct ctl_table *tmp;
12 int depth;
13
14 depth = 0;
15 for (tmp = table; tmp->parent; tmp = tmp->parent)
16 depth++;
17
18 return depth;
19}
20
21static struct ctl_table *sysctl_parent(struct ctl_table *table, int n)
22{
23 int i;
24
25 for (i = 0; table && i < n; i++)
26 table = table->parent;
27
28 return table;
29}
30
31
32static void sysctl_print_path(struct ctl_table *table)
33{
34 struct ctl_table *tmp;
35 int depth, i;
36 depth = sysctl_depth(table);
37 if (table->procname) {
38 for (i = depth; i >= 0; i--) {
39 tmp = sysctl_parent(table, i);
40 printk("/%s", tmp->procname?tmp->procname:"");
41 }
42 }
43 printk(" ");
44}
45
46static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces,
47 struct ctl_table *table)
48{
49 struct ctl_table_header *head;
50 struct ctl_table *ref, *test;
51 int depth, cur_depth;
52
53 depth = sysctl_depth(table);
54
55 for (head = __sysctl_head_next(namespaces, NULL); head;
56 head = __sysctl_head_next(namespaces, head)) {
57 cur_depth = depth;
58 ref = head->ctl_table;
59repeat:
60 test = sysctl_parent(table, cur_depth);
61 for (; ref->procname; ref++) {
62 int match = 0;
63 if (cur_depth && !ref->child)
64 continue;
65
66 if (test->procname && ref->procname &&
67 (strcmp(test->procname, ref->procname) == 0))
68 match++;
69
70 if (match) {
71 if (cur_depth != 0) {
72 cur_depth--;
73 ref = ref->child;
74 goto repeat;
75 }
76 goto out;
77 }
78 }
79 }
80 ref = NULL;
81out:
82 sysctl_head_finish(head);
83 return ref;
84}
85
86static void set_fail(const char **fail, struct ctl_table *table, const char *str)
87{
88 if (*fail) {
89 printk(KERN_ERR "sysctl table check failed: ");
90 sysctl_print_path(table);
91 printk(" %s\n", *fail);
92 dump_stack();
93 }
94 *fail = str;
95}
96
97static void sysctl_check_leaf(struct nsproxy *namespaces,
98 struct ctl_table *table, const char **fail)
99{
100 struct ctl_table *ref;
101
102 ref = sysctl_check_lookup(namespaces, table);
103 if (ref && (ref != table))
104 set_fail(fail, table, "Sysctl already exists");
105}
106
107int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
108{
109 int error = 0;
110 for (; table->procname; table++) {
111 const char *fail = NULL;
112
113 if (table->parent) {
114 if (!table->parent->procname)
115 set_fail(&fail, table, "Parent without procname");
116 }
117 if (table->child) {
118 if (table->data)
119 set_fail(&fail, table, "Directory with data?");
120 if (table->maxlen)
121 set_fail(&fail, table, "Directory with maxlen?");
122 if ((table->mode & (S_IRUGO|S_IXUGO)) != table->mode)
123 set_fail(&fail, table, "Writable sysctl directory");
124 if (table->proc_handler)
125 set_fail(&fail, table, "Directory with proc_handler");
126 if (table->extra1)
127 set_fail(&fail, table, "Directory with extra1");
128 if (table->extra2)
129 set_fail(&fail, table, "Directory with extra2");
130 } else {
131 if ((table->proc_handler == proc_dostring) ||
132 (table->proc_handler == proc_dointvec) ||
133 (table->proc_handler == proc_dointvec_minmax) ||
134 (table->proc_handler == proc_dointvec_jiffies) ||
135 (table->proc_handler == proc_dointvec_userhz_jiffies) ||
136 (table->proc_handler == proc_dointvec_ms_jiffies) ||
137 (table->proc_handler == proc_doulongvec_minmax) ||
138 (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
139 if (!table->data)
140 set_fail(&fail, table, "No data");
141 if (!table->maxlen)
142 set_fail(&fail, table, "No maxlen");
143 }
144#ifdef CONFIG_PROC_SYSCTL
145 if (!table->proc_handler)
146 set_fail(&fail, table, "No proc_handler");
147#endif
148 sysctl_check_leaf(namespaces, table, &fail);
149 }
150 if (table->mode > 0777)
151 set_fail(&fail, table, "bogus .mode");
152 if (fail) {
153 set_fail(&fail, table, NULL);
154 error = -EINVAL;
155 }
156 if (table->child)
157 error |= sysctl_check_table(namespaces, table->child);
158 }
159 return error;
160}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 391003f7ab46..f7af95d304c5 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1141,14 +1141,6 @@ config LATENCYTOP
1141 Enable this option if you want to use the LatencyTOP tool 1141 Enable this option if you want to use the LatencyTOP tool
1142 to find out which userspace is blocking on what kernel operations. 1142 to find out which userspace is blocking on what kernel operations.
1143 1143
1144config SYSCTL_SYSCALL_CHECK
1145 bool "Sysctl checks"
1146 depends on SYSCTL
1147 ---help---
1148 sys_sysctl uses binary paths that have been found challenging
1149 to properly maintain and use. This enables checks that help
1150 you to keep things correct.
1151
1152source mm/Kconfig.debug 1144source mm/Kconfig.debug
1153source kernel/trace/Kconfig 1145source kernel/trace/Kconfig
1154 1146
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index e75813904f26..c3e65aebecc0 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -74,15 +74,13 @@ static struct ctl_table_root net_sysctl_ro_root = {
74 74
75static int __net_init sysctl_net_init(struct net *net) 75static int __net_init sysctl_net_init(struct net *net)
76{ 76{
77 setup_sysctl_set(&net->sysctls, 77 setup_sysctl_set(&net->sysctls, &net_sysctl_root, is_seen);
78 &net_sysctl_ro_root.default_set,
79 is_seen);
80 return 0; 78 return 0;
81} 79}
82 80
83static void __net_exit sysctl_net_exit(struct net *net) 81static void __net_exit sysctl_net_exit(struct net *net)
84{ 82{
85 WARN_ON(!list_empty(&net->sysctls.list)); 83 retire_sysctl_set(&net->sysctls);
86} 84}
87 85
88static struct pernet_operations sysctl_pernet_ops = { 86static struct pernet_operations sysctl_pernet_ops = {
@@ -90,36 +88,32 @@ static struct pernet_operations sysctl_pernet_ops = {
90 .exit = sysctl_net_exit, 88 .exit = sysctl_net_exit,
91}; 89};
92 90
93static __init int sysctl_init(void) 91static __init int net_sysctl_init(void)
94{ 92{
95 int ret; 93 int ret;
96 ret = register_pernet_subsys(&sysctl_pernet_ops); 94 ret = register_pernet_subsys(&sysctl_pernet_ops);
97 if (ret) 95 if (ret)
98 goto out; 96 goto out;
99 register_sysctl_root(&net_sysctl_root); 97 setup_sysctl_set(&net_sysctl_ro_root.default_set, &net_sysctl_ro_root, NULL);
100 setup_sysctl_set(&net_sysctl_ro_root.default_set, NULL, NULL);
101 register_sysctl_root(&net_sysctl_ro_root); 98 register_sysctl_root(&net_sysctl_ro_root);
99 register_sysctl_root(&net_sysctl_root);
102out: 100out:
103 return ret; 101 return ret;
104} 102}
105subsys_initcall(sysctl_init); 103subsys_initcall(net_sysctl_init);
106 104
107struct ctl_table_header *register_net_sysctl_table(struct net *net, 105struct ctl_table_header *register_net_sysctl_table(struct net *net,
108 const struct ctl_path *path, struct ctl_table *table) 106 const struct ctl_path *path, struct ctl_table *table)
109{ 107{
110 struct nsproxy namespaces; 108 return __register_sysctl_paths(&net->sysctls, path, table);
111 namespaces = *current->nsproxy;
112 namespaces.net_ns = net;
113 return __register_sysctl_paths(&net_sysctl_root,
114 &namespaces, path, table);
115} 109}
116EXPORT_SYMBOL_GPL(register_net_sysctl_table); 110EXPORT_SYMBOL_GPL(register_net_sysctl_table);
117 111
118struct ctl_table_header *register_net_sysctl_rotable(const 112struct ctl_table_header *register_net_sysctl_rotable(const
119 struct ctl_path *path, struct ctl_table *table) 113 struct ctl_path *path, struct ctl_table *table)
120{ 114{
121 return __register_sysctl_paths(&net_sysctl_ro_root, 115 return __register_sysctl_paths(&net_sysctl_ro_root.default_set,
122 &init_nsproxy, path, table); 116 path, table);
123} 117}
124EXPORT_SYMBOL_GPL(register_net_sysctl_rotable); 118EXPORT_SYMBOL_GPL(register_net_sysctl_rotable);
125 119