aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2007-02-14 03:34:12 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-02-14 11:10:00 -0500
commit77b14db502cb85a031fe8fde6c85d52f3e0acb63 (patch)
tree4201f6a4dfe1062d1dc00659c403d630401b87cc
parent1ff007eb8e8c7c44e9a384a67d0fdd0fd06ba811 (diff)
[PATCH] sysctl: reimplement the sysctl proc support
With this change the sysctl inodes can be cached and nothing needs to be done when removing a sysctl table. For a cost of 2K code we will save about 4K of static tables (when we remove de from ctl_table) and 70K in proc_dir_entries that we will not allocate, or about half that on a 32bit arch. The speed feels about the same, even though we can now cache the sysctl dentries :( We get the core advantage that we don't need to have a 1 to 1 mapping between ctl table entries and proc files. Making it possible to have /proc/sys vary depending on the namespace you are in. The currently merged namespaces don't have an issue here but the network namespace under /proc/sys/net needs to have different directories depending on which network adapters are visible. By simply being a cache different directories being visible depending on who you are is trivial to implement. [akpm@osdl.org: fix uninitialised var] [akpm@osdl.org: fix ARM build] [bunk@stusta.de: make things static] Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Cc: Russell King <rmk@arm.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/proc/Makefile2
-rw-r--r--fs/proc/generic.c2
-rw-r--r--fs/proc/inode.c1
-rw-r--r--fs/proc/internal.h2
-rw-r--r--fs/proc/proc_sysctl.c478
-rw-r--r--fs/proc/root.c10
-rw-r--r--include/linux/proc_fs.h2
-rw-r--r--include/linux/sysctl.h2
-rw-r--r--init/main.c4
-rw-r--r--kernel/sysctl.c182
10 files changed, 486 insertions, 199 deletions
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index f6c776272572..a6b3a8f878f0 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -8,7 +8,7 @@ proc-y := nommu.o task_nommu.o
8proc-$(CONFIG_MMU) := mmu.o task_mmu.o 8proc-$(CONFIG_MMU) := mmu.o task_mmu.o
9 9
10proc-y += inode.o root.o base.o generic.o array.o \ 10proc-y += inode.o root.o base.o generic.o array.o \
11 proc_tty.o proc_misc.o 11 proc_tty.o proc_misc.o proc_sysctl.o
12 12
13proc-$(CONFIG_PROC_KCORE) += kcore.o 13proc-$(CONFIG_PROC_KCORE) += kcore.o
14proc-$(CONFIG_PROC_VMCORE) += vmcore.o 14proc-$(CONFIG_PROC_VMCORE) += vmcore.o
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 0cdc00d9d97e..775fb21294d8 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -32,7 +32,7 @@ static loff_t proc_file_lseek(struct file *, loff_t, int);
32 32
33DEFINE_SPINLOCK(proc_subdir_lock); 33DEFINE_SPINLOCK(proc_subdir_lock);
34 34
35int proc_match(int len, const char *name, struct proc_dir_entry *de) 35static int proc_match(int len, const char *name, struct proc_dir_entry *de)
36{ 36{
37 if (de->namelen != len) 37 if (de->namelen != len)
38 return 0; 38 return 0;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index f6722be37dde..c372eb151a3a 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -161,6 +161,7 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
161 if (!inode) 161 if (!inode)
162 goto out_ino; 162 goto out_ino;
163 163
164 PROC_I(inode)->fd = 0;
164 PROC_I(inode)->pde = de; 165 PROC_I(inode)->pde = de;
165 if (de) { 166 if (de) {
166 if (de->mode) { 167 if (de->mode) {
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 277dcd66ebe2..c932aa65e198 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -11,6 +11,8 @@
11 11
12#include <linux/proc_fs.h> 12#include <linux/proc_fs.h>
13 13
14extern int proc_sys_init(void);
15
14struct vmalloc_info { 16struct vmalloc_info {
15 unsigned long used; 17 unsigned long used;
16 unsigned long largest_chunk; 18 unsigned long largest_chunk;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
new file mode 100644
index 000000000000..bb16a1e78826
--- /dev/null
+++ b/fs/proc/proc_sysctl.c
@@ -0,0 +1,478 @@
1/*
2 * /proc/sys support
3 */
4
5#include <linux/sysctl.h>
6#include <linux/proc_fs.h>
7#include <linux/security.h>
8#include "internal.h"
9
10static struct dentry_operations proc_sys_dentry_operations;
11static const struct file_operations proc_sys_file_operations;
12static struct inode_operations proc_sys_inode_operations;
13
14static void proc_sys_refresh_inode(struct inode *inode, struct ctl_table *table)
15{
16 /* Refresh the cached information bits in the inode */
17 if (table) {
18 inode->i_uid = 0;
19 inode->i_gid = 0;
20 inode->i_mode = table->mode;
21 if (table->proc_handler) {
22 inode->i_mode |= S_IFREG;
23 inode->i_nlink = 1;
24 } else {
25 inode->i_mode |= S_IFDIR;
26 inode->i_nlink = 0; /* It is too hard to figure out */
27 }
28 }
29}
30
31static struct inode *proc_sys_make_inode(struct inode *dir, struct ctl_table *table)
32{
33 struct inode *inode;
34 struct proc_inode *dir_ei, *ei;
35 int depth;
36
37 inode = new_inode(dir->i_sb);
38 if (!inode)
39 goto out;
40
41 /* A directory is always one deeper than it's parent */
42 dir_ei = PROC_I(dir);
43 depth = dir_ei->fd + 1;
44
45 ei = PROC_I(inode);
46 ei->fd = depth;
47 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
48 inode->i_op = &proc_sys_inode_operations;
49 inode->i_fop = &proc_sys_file_operations;
50 proc_sys_refresh_inode(inode, table);
51out:
52 return inode;
53}
54
55static struct dentry *proc_sys_ancestor(struct dentry *dentry, int depth)
56{
57 for (;;) {
58 struct proc_inode *ei;
59
60 ei = PROC_I(dentry->d_inode);
61 if (ei->fd == depth)
62 break; /* found */
63
64 dentry = dentry->d_parent;
65 }
66 return dentry;
67}
68
69static struct ctl_table *proc_sys_lookup_table_one(struct ctl_table *table,
70 struct qstr *name)
71{
72 int len;
73 for ( ; table->ctl_name || table->procname; table++) {
74
75 if (!table->procname)
76 continue;
77
78 len = strlen(table->procname);
79 if (len != name->len)
80 continue;
81
82 if (memcmp(table->procname, name->name, len) != 0)
83 continue;
84
85 /* I have a match */
86 return table;
87 }
88 return NULL;
89}
90
91static struct ctl_table *proc_sys_lookup_table(struct dentry *dentry,
92 struct ctl_table *table)
93{
94 struct dentry *ancestor;
95 struct proc_inode *ei;
96 int depth, i;
97
98 ei = PROC_I(dentry->d_inode);
99 depth = ei->fd;
100
101 if (depth == 0)
102 return table;
103
104 for (i = 1; table && (i <= depth); i++) {
105 ancestor = proc_sys_ancestor(dentry, i);
106 table = proc_sys_lookup_table_one(table, &ancestor->d_name);
107 if (table)
108 table = table->child;
109 }
110 return table;
111
112}
113static struct ctl_table *proc_sys_lookup_entry(struct dentry *dparent,
114 struct qstr *name,
115 struct ctl_table *table)
116{
117 table = proc_sys_lookup_table(dparent, table);
118 if (table)
119 table = proc_sys_lookup_table_one(table, name);
120 return table;
121}
122
123static struct ctl_table *do_proc_sys_lookup(struct dentry *parent,
124 struct qstr *name,
125 struct ctl_table_header **ptr)
126{
127 struct ctl_table_header *head;
128 struct ctl_table *table = NULL;
129
130 for (head = sysctl_head_next(NULL); head;
131 head = sysctl_head_next(head)) {
132 table = proc_sys_lookup_entry(parent, name, head->ctl_table);
133 if (table)
134 break;
135 }
136 *ptr = head;
137 return table;
138}
139
140static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
141 struct nameidata *nd)
142{
143 struct ctl_table_header *head;
144 struct inode *inode;
145 struct dentry *err;
146 struct ctl_table *table;
147
148 err = ERR_PTR(-ENOENT);
149 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
150 if (!table)
151 goto out;
152
153 err = ERR_PTR(-ENOMEM);
154 inode = proc_sys_make_inode(dir, table);
155 if (!inode)
156 goto out;
157
158 err = NULL;
159 dentry->d_op = &proc_sys_dentry_operations;
160 d_add(dentry, inode);
161
162out:
163 sysctl_head_finish(head);
164 return err;
165}
166
167static ssize_t proc_sys_read(struct file *filp, char __user *buf,
168 size_t count, loff_t *ppos)
169{
170 struct dentry *dentry = filp->f_dentry;
171 struct ctl_table_header *head;
172 struct ctl_table *table;
173 ssize_t error, res;
174
175 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
176 /* Has the sysctl entry disappeared on us? */
177 error = -ENOENT;
178 if (!table)
179 goto out;
180
181 /* Has the sysctl entry been replaced by a directory? */
182 error = -EISDIR;
183 if (!table->proc_handler)
184 goto out;
185
186 /*
187 * At this point we know that the sysctl was not unregistered
188 * and won't be until we finish.
189 */
190 error = -EPERM;
191 if (sysctl_perm(table, MAY_READ))
192 goto out;
193
194 /* careful: calling conventions are nasty here */
195 res = count;
196 error = table->proc_handler(table, 0, filp, buf, &res, ppos);
197 if (!error)
198 error = res;
199out:
200 sysctl_head_finish(head);
201
202 return error;
203}
204
205static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
206 size_t count, loff_t *ppos)
207{
208 struct dentry *dentry = filp->f_dentry;
209 struct ctl_table_header *head;
210 struct ctl_table *table;
211 ssize_t error, res;
212
213 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
214 /* Has the sysctl entry disappeared on us? */
215 error = -ENOENT;
216 if (!table)
217 goto out;
218
219 /* Has the sysctl entry been replaced by a directory? */
220 error = -EISDIR;
221 if (!table->proc_handler)
222 goto out;
223
224 /*
225 * At this point we know that the sysctl was not unregistered
226 * and won't be until we finish.
227 */
228 error = -EPERM;
229 if (sysctl_perm(table, MAY_WRITE))
230 goto out;
231
232 /* careful: calling conventions are nasty here */
233 res = count;
234 error = table->proc_handler(table, 1, filp, (char __user *)buf,
235 &res, ppos);
236 if (!error)
237 error = res;
238out:
239 sysctl_head_finish(head);
240
241 return error;
242}
243
244
245static int proc_sys_fill_cache(struct file *filp, void *dirent,
246 filldir_t filldir, struct ctl_table *table)
247{
248 struct ctl_table_header *head;
249 struct ctl_table *child_table = NULL;
250 struct dentry *child, *dir = filp->f_path.dentry;
251 struct inode *inode;
252 struct qstr qname;
253 ino_t ino = 0;
254 unsigned type = DT_UNKNOWN;
255 int ret;
256
257 qname.name = table->procname;
258 qname.len = strlen(table->procname);
259 qname.hash = full_name_hash(qname.name, qname.len);
260
261 /* Suppress duplicates.
262 * Only fill a directory entry if it is the value that
263 * an ordinary lookup of that name returns. Hide all
264 * others.
265 *
266 * If we ever cache this translation in the dcache
267 * I should do a dcache lookup first. But for now
268 * it is just simpler not to.
269 */
270 ret = 0;
271 child_table = do_proc_sys_lookup(dir, &qname, &head);
272 sysctl_head_finish(head);
273 if (child_table != table)
274 return 0;
275
276 child = d_lookup(dir, &qname);
277 if (!child) {
278 struct dentry *new;
279 new = d_alloc(dir, &qname);
280 if (new) {
281 inode = proc_sys_make_inode(dir->d_inode, table);
282 if (!inode)
283 child = ERR_PTR(-ENOMEM);
284 else {
285 new->d_op = &proc_sys_dentry_operations;
286 d_add(new, inode);
287 }
288 if (child)
289 dput(new);
290 else
291 child = new;
292 }
293 }
294 if (!child || IS_ERR(child) || !child->d_inode)
295 goto end_instantiate;
296 inode = child->d_inode;
297 if (inode) {
298 ino = inode->i_ino;
299 type = inode->i_mode >> 12;
300 }
301 dput(child);
302end_instantiate:
303 if (!ino)
304 ino= find_inode_number(dir, &qname);
305 if (!ino)
306 ino = 1;
307 return filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type);
308}
309
310static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
311{
312 struct dentry *dentry = filp->f_dentry;
313 struct inode *inode = dentry->d_inode;
314 struct ctl_table_header *head = NULL;
315 struct ctl_table *table;
316 unsigned long pos;
317 int ret;
318
319 ret = -ENOTDIR;
320 if (!S_ISDIR(inode->i_mode))
321 goto out;
322
323 ret = 0;
324 /* Avoid a switch here: arm builds fail with missing __cmpdi2 */
325 if (filp->f_pos == 0) {
326 if (filldir(dirent, ".", 1, filp->f_pos,
327 inode->i_ino, DT_DIR) < 0)
328 goto out;
329 filp->f_pos++;
330 }
331 if (filp->f_pos == 1) {
332 if (filldir(dirent, "..", 2, filp->f_pos,
333 parent_ino(dentry), DT_DIR) < 0)
334 goto out;
335 filp->f_pos++;
336 }
337 pos = 2;
338
339 /* - Find each instance of the directory
340 * - Read all entries in each instance
341 * - Before returning an entry to user space lookup the entry
342 * by name and if I find a different entry don't return
343 * this one because it means it is a buried dup.
344 * For sysctl this should only happen for directory entries.
345 */
346 for (head = sysctl_head_next(NULL); head; head = sysctl_head_next(head)) {
347 table = proc_sys_lookup_table(dentry, head->ctl_table);
348
349 if (!table)
350 continue;
351
352 for (; table->ctl_name || table->procname; table++, pos++) {
353 /* Can't do anything without a proc name */
354 if (!table->procname)
355 continue;
356
357 if (pos < filp->f_pos)
358 continue;
359
360 if (proc_sys_fill_cache(filp, dirent, filldir, table) < 0)
361 goto out;
362 filp->f_pos = pos + 1;
363 }
364 }
365 ret = 1;
366out:
367 sysctl_head_finish(head);
368 return ret;
369}
370
371static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *nd)
372{
373 /*
374 * sysctl entries that are not writeable,
375 * are _NOT_ writeable, capabilities or not.
376 */
377 struct ctl_table_header *head;
378 struct ctl_table *table;
379 struct dentry *dentry;
380 int mode;
381 int depth;
382 int error;
383
384 head = NULL;
385 depth = PROC_I(inode)->fd;
386
387 /* First check the cached permissions, in case we don't have
388 * enough information to lookup the sysctl table entry.
389 */
390 error = -EACCES;
391 mode = inode->i_mode;
392
393 if (current->euid == 0)
394 mode >>= 6;
395 else if (in_group_p(0))
396 mode >>= 3;
397
398 if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)
399 error = 0;
400
401 /* If we can't get a sysctl table entry the permission
402 * checks on the cached mode will have to be enough.
403 */
404 if (!nd || !depth)
405 goto out;
406
407 dentry = nd->dentry;
408 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
409
410 /* If the entry does not exist deny permission */
411 error = -EACCES;
412 if (!table)
413 goto out;
414
415 /* Use the permissions on the sysctl table entry */
416 error = sysctl_perm(table, mask);
417out:
418 sysctl_head_finish(head);
419 return error;
420}
421
422static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
423{
424 struct inode *inode = dentry->d_inode;
425 int error;
426
427 if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))
428 return -EPERM;
429
430 error = inode_change_ok(inode, attr);
431 if (!error) {
432 error = security_inode_setattr(dentry, attr);
433 if (!error)
434 error = inode_setattr(inode, attr);
435 }
436
437 return error;
438}
439
440/* I'm lazy and don't distinguish between files and directories,
441 * until access time.
442 */
443static const struct file_operations proc_sys_file_operations = {
444 .read = proc_sys_read,
445 .write = proc_sys_write,
446 .readdir = proc_sys_readdir,
447};
448
449static struct inode_operations proc_sys_inode_operations = {
450 .lookup = proc_sys_lookup,
451 .permission = proc_sys_permission,
452 .setattr = proc_sys_setattr,
453};
454
455static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
456{
457 struct ctl_table_header *head;
458 struct ctl_table *table;
459 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
460 proc_sys_refresh_inode(dentry->d_inode, table);
461 sysctl_head_finish(head);
462 return !!table;
463}
464
465static struct dentry_operations proc_sys_dentry_operations = {
466 .d_revalidate = proc_sys_revalidate,
467};
468
469static struct proc_dir_entry *proc_sys_root;
470
471int proc_sys_init(void)
472{
473 proc_sys_root = proc_mkdir("sys", NULL);
474 proc_sys_root->proc_iops = &proc_sys_inode_operations;
475 proc_sys_root->proc_fops = &proc_sys_file_operations;
476 proc_sys_root->nlink = 0;
477 return 0;
478}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 6ae222b509ce..5834a744c2a9 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -23,10 +23,6 @@
23 23
24struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver; 24struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver;
25 25
26#ifdef CONFIG_SYSCTL
27struct proc_dir_entry *proc_sys_root;
28#endif
29
30static int proc_get_sb(struct file_system_type *fs_type, 26static int proc_get_sb(struct file_system_type *fs_type,
31 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 27 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
32{ 28{
@@ -71,9 +67,6 @@ void __init proc_root_init(void)
71#ifdef CONFIG_SYSVIPC 67#ifdef CONFIG_SYSVIPC
72 proc_mkdir("sysvipc", NULL); 68 proc_mkdir("sysvipc", NULL);
73#endif 69#endif
74#ifdef CONFIG_SYSCTL
75 proc_sys_root = proc_mkdir("sys", NULL);
76#endif
77 proc_root_fs = proc_mkdir("fs", NULL); 70 proc_root_fs = proc_mkdir("fs", NULL);
78 proc_root_driver = proc_mkdir("driver", NULL); 71 proc_root_driver = proc_mkdir("driver", NULL);
79 proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */ 72 proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */
@@ -86,6 +79,9 @@ void __init proc_root_init(void)
86 proc_device_tree_init(); 79 proc_device_tree_init();
87#endif 80#endif
88 proc_bus = proc_mkdir("bus", NULL); 81 proc_bus = proc_mkdir("bus", NULL);
82#ifdef CONFIG_SYSCTL
83 proc_sys_init();
84#endif
89} 85}
90 86
91static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat 87static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 2e132473cbe5..be4652a0545a 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -113,8 +113,6 @@ extern struct vfsmount *proc_mnt;
113extern int proc_fill_super(struct super_block *,void *,int); 113extern int proc_fill_super(struct super_block *,void *,int);
114extern struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *); 114extern struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *);
115 115
116extern int proc_match(int, const char *,struct proc_dir_entry *);
117
118/* 116/*
119 * These are generic /proc routines that use the internal 117 * These are generic /proc routines that use the internal
120 * "struct proc_dir_entry" tree to traverse the filesystem. 118 * "struct proc_dir_entry" tree to traverse the filesystem.
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 9698ac30f553..89150494bd10 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -930,8 +930,6 @@ extern struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev);
930extern void sysctl_head_finish(struct ctl_table_header *prev); 930extern void sysctl_head_finish(struct ctl_table_header *prev);
931extern int sysctl_perm(struct ctl_table *table, int op); 931extern int sysctl_perm(struct ctl_table *table, int op);
932 932
933extern void sysctl_init(void);
934
935typedef struct ctl_table ctl_table; 933typedef struct ctl_table ctl_table;
936 934
937typedef int ctl_handler (ctl_table *table, int __user *name, int nlen, 935typedef int ctl_handler (ctl_table *table, int __user *name, int nlen,
diff --git a/init/main.c b/init/main.c
index a20a5138211f..649ab5443d43 100644
--- a/init/main.c
+++ b/init/main.c
@@ -86,7 +86,6 @@ extern void init_IRQ(void);
86extern void fork_init(unsigned long); 86extern void fork_init(unsigned long);
87extern void mca_init(void); 87extern void mca_init(void);
88extern void sbus_init(void); 88extern void sbus_init(void);
89extern void sysctl_init(void);
90extern void signals_init(void); 89extern void signals_init(void);
91extern void pidhash_init(void); 90extern void pidhash_init(void);
92extern void pidmap_init(void); 91extern void pidmap_init(void);
@@ -702,9 +701,6 @@ static void __init do_basic_setup(void)
702 usermodehelper_init(); 701 usermodehelper_init();
703 driver_init(); 702 driver_init();
704 703
705#ifdef CONFIG_SYSCTL
706 sysctl_init();
707#endif
708#ifdef CONFIG_PROC_FS 704#ifdef CONFIG_PROC_FS
709 init_irq_proc(); 705 init_irq_proc();
710#endif 706#endif
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6bbac5ce75ed..b3ee791ad663 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -159,26 +159,6 @@ int sysctl_legacy_va_layout;
159#endif 159#endif
160 160
161 161
162/* /proc declarations: */
163
164#ifdef CONFIG_PROC_SYSCTL
165
166static ssize_t proc_readsys(struct file *, char __user *, size_t, loff_t *);
167static ssize_t proc_writesys(struct file *, const char __user *, size_t, loff_t *);
168static int proc_opensys(struct inode *, struct file *);
169
170const struct file_operations proc_sys_file_operations = {
171 .open = proc_opensys,
172 .read = proc_readsys,
173 .write = proc_writesys,
174};
175
176extern struct proc_dir_entry *proc_sys_root;
177
178static void register_proc_table(ctl_table *, struct proc_dir_entry *, void *);
179static void unregister_proc_table(ctl_table *, struct proc_dir_entry *);
180#endif
181
182/* The default sysctl tables: */ 162/* The default sysctl tables: */
183 163
184static ctl_table root_table[] = { 164static ctl_table root_table[] = {
@@ -1106,13 +1086,6 @@ struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1106 return NULL; 1086 return NULL;
1107} 1087}
1108 1088
1109void __init sysctl_init(void)
1110{
1111#ifdef CONFIG_PROC_SYSCTL
1112 register_proc_table(root_table, proc_sys_root, &root_table_header);
1113#endif
1114}
1115
1116#ifdef CONFIG_SYSCTL_SYSCALL 1089#ifdef CONFIG_SYSCTL_SYSCALL
1117int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, 1090int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1118 void __user *newval, size_t newlen) 1091 void __user *newval, size_t newlen)
@@ -1348,9 +1321,6 @@ struct ctl_table_header *register_sysctl_table(ctl_table * table)
1348 spin_lock(&sysctl_lock); 1321 spin_lock(&sysctl_lock);
1349 list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); 1322 list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
1350 spin_unlock(&sysctl_lock); 1323 spin_unlock(&sysctl_lock);
1351#ifdef CONFIG_PROC_SYSCTL
1352 register_proc_table(table, proc_sys_root, tmp);
1353#endif
1354 return tmp; 1324 return tmp;
1355} 1325}
1356 1326
@@ -1366,9 +1336,6 @@ void unregister_sysctl_table(struct ctl_table_header * header)
1366 might_sleep(); 1336 might_sleep();
1367 spin_lock(&sysctl_lock); 1337 spin_lock(&sysctl_lock);
1368 start_unregistering(header); 1338 start_unregistering(header);
1369#ifdef CONFIG_PROC_SYSCTL
1370 unregister_proc_table(header->ctl_table, proc_sys_root);
1371#endif
1372 spin_unlock(&sysctl_lock); 1339 spin_unlock(&sysctl_lock);
1373 kfree(header); 1340 kfree(header);
1374} 1341}
@@ -1392,155 +1359,6 @@ void unregister_sysctl_table(struct ctl_table_header * table)
1392 1359
1393#ifdef CONFIG_PROC_SYSCTL 1360#ifdef CONFIG_PROC_SYSCTL
1394 1361
1395/* Scan the sysctl entries in table and add them all into /proc */
1396static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set)
1397{
1398 struct proc_dir_entry *de;
1399 int len;
1400 mode_t mode;
1401
1402 for (; table->ctl_name || table->procname; table++) {
1403 /* Can't do anything without a proc name. */
1404 if (!table->procname)
1405 continue;
1406 /* Maybe we can't do anything with it... */
1407 if (!table->proc_handler && !table->child) {
1408 printk(KERN_WARNING "SYSCTL: Can't register %s\n",
1409 table->procname);
1410 continue;
1411 }
1412
1413 len = strlen(table->procname);
1414 mode = table->mode;
1415
1416 de = NULL;
1417 if (table->proc_handler)
1418 mode |= S_IFREG;
1419 else {
1420 mode |= S_IFDIR;
1421 for (de = root->subdir; de; de = de->next) {
1422 if (proc_match(len, table->procname, de))
1423 break;
1424 }
1425 /* If the subdir exists already, de is non-NULL */
1426 }
1427
1428 if (!de) {
1429 de = create_proc_entry(table->procname, mode, root);
1430 if (!de)
1431 continue;
1432 de->set = set;
1433 de->data = (void *) table;
1434 if (table->proc_handler)
1435 de->proc_fops = &proc_sys_file_operations;
1436 }
1437 table->de = de;
1438 if (de->mode & S_IFDIR)
1439 register_proc_table(table->child, de, set);
1440 }
1441}
1442
1443/*
1444 * Unregister a /proc sysctl table and any subdirectories.
1445 */
1446static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root)
1447{
1448 struct proc_dir_entry *de;
1449 for (; table->ctl_name || table->procname; table++) {
1450 if (!(de = table->de))
1451 continue;
1452 if (de->mode & S_IFDIR) {
1453 if (!table->child) {
1454 printk (KERN_ALERT "Help - malformed sysctl tree on free\n");
1455 continue;
1456 }
1457 unregister_proc_table(table->child, de);
1458
1459 /* Don't unregister directories which still have entries.. */
1460 if (de->subdir)
1461 continue;
1462 }
1463
1464 /*
1465 * In any case, mark the entry as goner; we'll keep it
1466 * around if it's busy, but we'll know to do nothing with
1467 * its fields. We are under sysctl_lock here.
1468 */
1469 de->data = NULL;
1470
1471 /* Don't unregister proc entries that are still being used.. */
1472 if (atomic_read(&de->count))
1473 continue;
1474
1475 table->de = NULL;
1476 remove_proc_entry(table->procname, root);
1477 }
1478}
1479
1480static ssize_t do_rw_proc(int write, struct file * file, char __user * buf,
1481 size_t count, loff_t *ppos)
1482{
1483 int op;
1484 struct proc_dir_entry *de = PDE(file->f_path.dentry->d_inode);
1485 struct ctl_table *table;
1486 size_t res;
1487 ssize_t error = -ENOTDIR;
1488
1489 spin_lock(&sysctl_lock);
1490 if (de && de->data && use_table(de->set)) {
1491 /*
1492 * at that point we know that sysctl was not unregistered
1493 * and won't be until we finish
1494 */
1495 spin_unlock(&sysctl_lock);
1496 table = (struct ctl_table *) de->data;
1497 if (!table || !table->proc_handler)
1498 goto out;
1499 error = -EPERM;
1500 op = (write ? 002 : 004);
1501 if (sysctl_perm(table, op))
1502 goto out;
1503
1504 /* careful: calling conventions are nasty here */
1505 res = count;
1506 error = (*table->proc_handler)(table, write, file,
1507 buf, &res, ppos);
1508 if (!error)
1509 error = res;
1510 out:
1511 spin_lock(&sysctl_lock);
1512 unuse_table(de->set);
1513 }
1514 spin_unlock(&sysctl_lock);
1515 return error;
1516}
1517
1518static int proc_opensys(struct inode *inode, struct file *file)
1519{
1520 if (file->f_mode & FMODE_WRITE) {
1521 /*
1522 * sysctl entries that are not writable,
1523 * are _NOT_ writable, capabilities or not.
1524 */
1525 if (!(inode->i_mode & S_IWUSR))
1526 return -EPERM;
1527 }
1528
1529 return 0;
1530}
1531
1532static ssize_t proc_readsys(struct file * file, char __user * buf,
1533 size_t count, loff_t *ppos)
1534{
1535 return do_rw_proc(0, file, buf, count, ppos);
1536}
1537
1538static ssize_t proc_writesys(struct file * file, const char __user * buf,
1539 size_t count, loff_t *ppos)
1540{
1541 return do_rw_proc(1, file, (char __user *) buf, count, ppos);
1542}
1543
1544static int _proc_do_string(void* data, int maxlen, int write, 1362static int _proc_do_string(void* data, int maxlen, int write,
1545 struct file *filp, void __user *buffer, 1363 struct file *filp, void __user *buffer,
1546 size_t *lenp, loff_t *ppos) 1364 size_t *lenp, loff_t *ppos)